mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-07 21:27:04 +08:00
feature: add support encodng front end command to patch preamble
Related-To: NEO-15376 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
e88de52133
commit
b0e9267e62
@@ -496,7 +496,7 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
*csr);
|
||||
}
|
||||
|
||||
static_cast<CommandQueueHw<gfxCoreFamily> *>(this->cmdQImmediate)->patchCommands(*this, 0u, false);
|
||||
static_cast<CommandQueueHw<gfxCoreFamily> *>(this->cmdQImmediate)->patchCommands(*this, 0u, false, nullptr);
|
||||
} else {
|
||||
lockForIndirect = std::move(*outerLockForIndirect);
|
||||
cmdQImp->makeResidentForResidencyContainer(this->commandContainer.getResidencyContainer());
|
||||
|
||||
@@ -54,7 +54,8 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
uint32_t perThreadScratchSpaceSlot1Size);
|
||||
|
||||
bool getPreemptionCmdProgramming() override;
|
||||
void patchCommands(CommandList &commandList, uint64_t scratchAddress, bool patchNewScratchController);
|
||||
void patchCommands(CommandList &commandList, uint64_t scratchAddress, bool patchNewScratchController,
|
||||
void **patchPreambleBuffer);
|
||||
|
||||
protected:
|
||||
struct CommandListExecutionContext {
|
||||
@@ -154,6 +155,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
|
||||
inline size_t estimateCommandListPrimaryStart(bool required);
|
||||
inline size_t estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists);
|
||||
inline size_t estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList);
|
||||
inline void retrivePatchPreambleSpace(CommandListExecutionContext &ctx, NEO::LinearStream &commandStream);
|
||||
inline void dispatchPatchPreambleEnding(CommandListExecutionContext &ctx);
|
||||
inline size_t estimateCommandListResidencySize(CommandList *commandList);
|
||||
|
||||
@@ -262,6 +262,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateStreamSizeForExecuteCommandListsRe
|
||||
linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(ctx.globalInit || this->forceBbStartJump);
|
||||
for (uint32_t i = 0; i < numCommandLists; i++) {
|
||||
auto cmdList = CommandList::fromHandle(commandListHandles[i]);
|
||||
linearStreamSizeEstimate += estimateCommandListPatchPreambleFrontEndCmd(ctx, cmdList);
|
||||
linearStreamSizeEstimate += estimateCommandListSecondaryStart(cmdList);
|
||||
}
|
||||
|
||||
@@ -908,6 +909,19 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPrimaryStart(bool requi
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPatchPreambleFrontEndCmd(CommandListExecutionContext &ctx, CommandList *commandList) {
|
||||
size_t encodeSize = 0;
|
||||
if (this->patchingPreamble) {
|
||||
const size_t feCmdSize = NEO::PreambleHelper<GfxFamily>::getVFECommandsSize();
|
||||
size_t singleFeCmdEncodeSize = NEO::EncodeDataMemory<GfxFamily>::getCommandSizeForEncode(feCmdSize);
|
||||
|
||||
encodeSize = singleFeCmdEncodeSize * commandList->getFrontEndPatchListCount();
|
||||
ctx.bufferSpaceForPatchPreamble += encodeSize;
|
||||
}
|
||||
return encodeSize;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPatchPreamble(CommandListExecutionContext &ctx, uint32_t numCommandLists) {
|
||||
size_t encodeSize = 0;
|
||||
@@ -920,7 +934,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPatchPreamble(CommandLi
|
||||
encodeSize += NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForSingleBarrier();
|
||||
encodeSize += 2 * NEO::EncodeMiArbCheck<GfxFamily>::getCommandSize();
|
||||
|
||||
ctx.bufferSpaceForPatchPreamble = encodeSize;
|
||||
ctx.bufferSpaceForPatchPreamble += encodeSize;
|
||||
|
||||
// patch preamble dispatched into queue's buffer forces not to use cmdlist as a starting buffer
|
||||
this->forceBbStartJump = true;
|
||||
@@ -1021,6 +1035,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
const NEO::StreamProperties &requiredStreamState = cmdList->getRequiredStreamState();
|
||||
const NEO::StreamProperties &finalStreamState = cmdList->getFinalStreamState();
|
||||
|
||||
linearStreamSizeEstimate += estimateCommandListPatchPreambleFrontEndCmd(ctx, cmdList);
|
||||
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirty, cmdList,
|
||||
streamProperties, requiredStreamState, finalStreamState,
|
||||
cmdListState.requiredState,
|
||||
@@ -1902,7 +1917,7 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, Comm
|
||||
}
|
||||
}
|
||||
|
||||
patchCommands(commandList, scratchAddress, patchNewScratchController);
|
||||
patchCommands(commandList, scratchAddress, patchNewScratchController, &ctx.currentPatchPreambleBuffer);
|
||||
|
||||
if (patchNewScratchController) {
|
||||
commandList.setCommandListUsedScratchController(ctx.scratchSpaceController);
|
||||
|
||||
@@ -132,7 +132,8 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &heapC
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint64_t scratchAddress,
|
||||
bool patchNewScratchController) {
|
||||
bool patchNewScratchController,
|
||||
void **patchPreambleBuffer) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
|
||||
@@ -164,7 +164,8 @@ void CommandQueueHw<gfxCoreFamily>::handleScratchSpace(NEO::HeapContainer &sshHe
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint64_t scratchAddress,
|
||||
bool patchNewScratchController) {
|
||||
bool patchNewScratchController,
|
||||
void **patchPreambleBuffer) {
|
||||
using MI_SEMAPHORE_WAIT = typename GfxFamily::MI_SEMAPHORE_WAIT;
|
||||
using COMPARE_OPERATION = typename GfxFamily::MI_SEMAPHORE_WAIT::COMPARE_OPERATION;
|
||||
|
||||
@@ -181,7 +182,11 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
cfeStateCmd->setScratchSpaceBuffer(lowScratchAddress);
|
||||
NEO::PreambleHelper<GfxFamily>::setSingleSliceDispatchMode(cfeStateCmd, false);
|
||||
|
||||
*reinterpret_cast<CFE_STATE *>(commandToPatch.pDestination) = *cfeStateCmd;
|
||||
if (this->patchingPreamble) {
|
||||
NEO::EncodeDataMemory<GfxFamily>::programDataMemory(*patchPreambleBuffer, commandToPatch.gpuAddress, commandToPatch.pCommand, sizeof(CFE_STATE));
|
||||
} else {
|
||||
*reinterpret_cast<CFE_STATE *>(commandToPatch.pDestination) = *cfeStateCmd;
|
||||
}
|
||||
break;
|
||||
} else {
|
||||
UNRECOVERABLE_IF(true);
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/api_specific_config.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
@@ -1524,6 +1525,103 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenHeapfulSupportWhenAppendVfeStateCm
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, GivenPatchPreambleActiveWhenExecutingCommandListWithFrontEndCmdInPatchListThenExpectPatchPreambleEncoding, IsAtLeastXeCore) {
|
||||
if constexpr (FamilyType::isHeaplessRequired() == true) {
|
||||
GTEST_SKIP();
|
||||
} else {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
|
||||
ze_result_t returnValue;
|
||||
ze_command_queue_desc_t queueDesc{ZE_STRUCTURE_TYPE_COMMAND_QUEUE_DESC};
|
||||
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&queueDesc,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
returnValue = commandList->initialize(device, NEO::EngineGroupType::compute, 0u);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
commandList->setCommandListPerThreadScratchSize(0, 0x1000);
|
||||
|
||||
auto expectedGpuAddress = commandList->getCmdContainer().getCommandStream()->getCurrentGpuAddressPosition();
|
||||
commandList->appendVfeStateCmdToPatch();
|
||||
ASSERT_NE(0u, commandList->commandsToPatch.size());
|
||||
EXPECT_EQ(CommandToPatch::FrontEndState, commandList->commandsToPatch[0].type);
|
||||
EXPECT_EQ(expectedGpuAddress, commandList->commandsToPatch[0].gpuAddress);
|
||||
EXPECT_EQ(1u, commandList->getFrontEndPatchListCount());
|
||||
|
||||
auto expectedGpuAddress2 = commandList->getCmdContainer().getCommandStream()->getCurrentGpuAddressPosition();
|
||||
commandList->appendVfeStateCmdToPatch();
|
||||
EXPECT_EQ(CommandToPatch::FrontEndState, commandList->commandsToPatch[1].type);
|
||||
EXPECT_EQ(expectedGpuAddress2, commandList->commandsToPatch[1].gpuAddress);
|
||||
EXPECT_EQ(2u, commandList->getFrontEndPatchListCount());
|
||||
|
||||
commandList->close();
|
||||
|
||||
void *cfeInputPtr = commandList->commandsToPatch[0].pCommand;
|
||||
void *cfeInputPtr2 = commandList->commandsToPatch[1].pCommand;
|
||||
|
||||
commandQueue->setPatchingPreamble(true);
|
||||
|
||||
void *queueCpuBase = commandQueue->commandStream.getCpuBase();
|
||||
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
|
||||
returnValue = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false, nullptr, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto usedSpaceAfter = commandQueue->commandStream.getUsed();
|
||||
auto scratchAddress = static_cast<uint32_t>(commandQueue->getCsr()->getScratchSpaceController()->getScratchPatchAddress());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(queueCpuBase, usedSpaceBefore),
|
||||
usedSpaceAfter - usedSpaceBefore));
|
||||
|
||||
uint32_t cfeStateDwordBuffer[sizeof(CFE_STATE) / sizeof(uint32_t)] = {0};
|
||||
uint32_t cfeStateDwordBuffer2[sizeof(CFE_STATE) / sizeof(uint32_t)] = {0};
|
||||
|
||||
auto sdiCmds = findAll<MI_STORE_DATA_IMM *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_LT(6u, sdiCmds.size());
|
||||
|
||||
// CFE_STATE size is qword aligned and are only commands dispatched into command lists, so optimal number of SDIs - 3xqword
|
||||
for (uint32_t i = 0; i < 3; i++) {
|
||||
auto storeDataImmForCfe = reinterpret_cast<MI_STORE_DATA_IMM *>(*sdiCmds[i]);
|
||||
auto storeDataImmForCfe2 = reinterpret_cast<MI_STORE_DATA_IMM *>(*sdiCmds[i + 3]);
|
||||
|
||||
EXPECT_EQ(expectedGpuAddress + i * sizeof(uint64_t), storeDataImmForCfe->getAddress());
|
||||
EXPECT_EQ(expectedGpuAddress2 + i * sizeof(uint64_t), storeDataImmForCfe2->getAddress());
|
||||
|
||||
EXPECT_TRUE(storeDataImmForCfe->getStoreQword());
|
||||
EXPECT_TRUE(storeDataImmForCfe2->getStoreQword());
|
||||
|
||||
cfeStateDwordBuffer[2 * i] = storeDataImmForCfe->getDataDword0();
|
||||
cfeStateDwordBuffer[2 * i + 1] = storeDataImmForCfe->getDataDword1();
|
||||
|
||||
cfeStateDwordBuffer2[2 * i] = storeDataImmForCfe2->getDataDword0();
|
||||
cfeStateDwordBuffer2[2 * i + 1] = storeDataImmForCfe2->getDataDword1();
|
||||
}
|
||||
|
||||
auto cfeEncodedCmd = genCmdCast<CFE_STATE *>(cfeStateDwordBuffer);
|
||||
ASSERT_NE(nullptr, cfeEncodedCmd);
|
||||
EXPECT_EQ(scratchAddress, cfeEncodedCmd->getScratchSpaceBuffer());
|
||||
auto cfeEncodedCmd2 = genCmdCast<CFE_STATE *>(cfeStateDwordBuffer2);
|
||||
ASSERT_NE(nullptr, cfeEncodedCmd2);
|
||||
EXPECT_EQ(scratchAddress, cfeEncodedCmd2->getScratchSpaceBuffer());
|
||||
|
||||
EXPECT_EQ(0, memcmp(cfeInputPtr, cfeStateDwordBuffer, sizeof(CFE_STATE)));
|
||||
EXPECT_EQ(0, memcmp(cfeInputPtr2, cfeStateDwordBuffer2, sizeof(CFE_STATE)));
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandListAppendLaunchKernel, whenUpdateStreamPropertiesIsCalledThenCorrectThreadArbitrationPolicyIsSet, IsHeapfulSupported) {
|
||||
DebugManagerStateRestore restorer;
|
||||
debugManager.flags.ForceThreadArbitrationPolicyProgrammingWithScm.set(1);
|
||||
|
||||
@@ -981,9 +981,9 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
|
||||
auto commandQueue = std::make_unique<MockCommandQueueHw<FamilyType::gfxCoreFamily>>(device, csr, &desc);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
|
||||
EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.push_back({});
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.clear();
|
||||
|
||||
if constexpr (FamilyType::isHeaplessRequired()) {
|
||||
@@ -992,7 +992,7 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
|
||||
commandToPatch.pCommand = nullptr;
|
||||
commandToPatch.type = CommandToPatch::FrontEndState;
|
||||
commandList->commandsToPatch.push_back(commandToPatch);
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.clear();
|
||||
} else {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
@@ -1021,7 +1021,7 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
|
||||
}
|
||||
|
||||
uint64_t patchedScratchAddress = 0xABCD00;
|
||||
commandQueue->patchCommands(*commandList, patchedScratchAddress, false);
|
||||
commandQueue->patchCommands(*commandList, patchedScratchAddress, false, nullptr);
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
EXPECT_EQ(patchedScratchAddress, destinationCfeStates[i].getScratchSpaceBuffer());
|
||||
auto &sourceCfeState = *reinterpret_cast<CFE_STATE *>(commandList->commandsToPatch[i].pCommand);
|
||||
@@ -1041,21 +1041,21 @@ HWTEST2_F(CommandQueueScratchTests, givenCommandsToPatchToNotSupportedPlatformWh
|
||||
auto commandQueue = std::make_unique<MockCommandQueueHw<FamilyType::gfxCoreFamily>>(device, csr, &desc);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
|
||||
|
||||
EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_NO_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.push_back({});
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.clear();
|
||||
|
||||
CommandToPatch commandToPatch;
|
||||
|
||||
commandToPatch.type = CommandToPatch::FrontEndState;
|
||||
commandList->commandsToPatch.push_back(commandToPatch);
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.clear();
|
||||
|
||||
commandToPatch.type = CommandToPatch::Invalid;
|
||||
commandList->commandsToPatch.push_back(commandToPatch);
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false));
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, 0, false, nullptr));
|
||||
commandList->commandsToPatch.clear();
|
||||
}
|
||||
|
||||
@@ -1100,7 +1100,7 @@ HWTEST2_F(CommandQueueScratchTests, givenInlineDataScratchWhenPatchCommandsIsCal
|
||||
cmd.scratchAddressAfterPatch = testCase.scratchAlreadyPatched ? scratchAddress : 0;
|
||||
|
||||
commandList->commandsToPatch.push_back(cmd);
|
||||
commandQueue->patchCommands(*commandList, scratchAddress, testCase.scratchControllerChanged);
|
||||
commandQueue->patchCommands(*commandList, scratchAddress, testCase.scratchControllerChanged, nullptr);
|
||||
|
||||
EXPECT_EQ(testCase.expectedValue, scratchBuffer);
|
||||
}
|
||||
@@ -1133,7 +1133,7 @@ HWTEST2_F(CommandQueueScratchTests, givenImplicitArgsScratchWhenPatchCommandsIsC
|
||||
cmd.scratchAddressAfterPatch = scratchAlreadyPatched ? scratchAddress : 0;
|
||||
|
||||
commandList->commandsToPatch.push_back(cmd);
|
||||
commandQueue->patchCommands(*commandList, scratchAddress, scratchControllerChanged);
|
||||
commandQueue->patchCommands(*commandList, scratchAddress, scratchControllerChanged, nullptr);
|
||||
|
||||
EXPECT_EQ(expectedValue, scratchBuffer);
|
||||
}
|
||||
@@ -1162,12 +1162,12 @@ HWTEST_F(CommandQueueCreate, givenCommandsToPatchWithNoopSpacePatchWhenPatchComm
|
||||
commandToPatch.patchSize = dataSize;
|
||||
|
||||
commandList->commandsToPatch.push_back(commandToPatch);
|
||||
commandQueue->patchCommands(*commandList, 0, false);
|
||||
commandQueue->patchCommands(*commandList, 0, false, nullptr);
|
||||
EXPECT_EQ(0, memcmp(patchBuffer.get(), zeroBuffer.get(), dataSize));
|
||||
|
||||
memset(patchBuffer.get(), 0xFF, dataSize);
|
||||
commandList->commandsToPatch[0].pDestination = nullptr;
|
||||
commandQueue->patchCommands(*commandList, 0, false);
|
||||
commandQueue->patchCommands(*commandList, 0, false, nullptr);
|
||||
EXPECT_NE(0, memcmp(patchBuffer.get(), zeroBuffer.get(), dataSize));
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user