mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
feature: handle passing separate epilogue immediate command buffer
Related-To: NEO-10356 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
bb61dafd72
commit
2b370f6a6f
@@ -240,6 +240,7 @@ struct CommandListCoreFamilyImmediate : public CommandListCoreFamily<gfxCoreFami
|
||||
void allocateOrReuseKernelPrivateMemoryIfNeeded(Kernel *kernel, uint32_t sizePerHwThread) override;
|
||||
void handleInOrderNonWalkerSignaling(Event *event, bool &hasStallingCmds, bool &relaxedOrderingDispatch, ze_result_t &result);
|
||||
CommandQueue *getCmdQImmediate(bool copyOffloadOperation) const;
|
||||
NEO::LinearStream *getOptionalEpilogueCmdStream(NEO::LinearStream *taskCmdStream, NEO::AppendOperations appendOperation);
|
||||
|
||||
MOCKABLE_VIRTUAL void checkAssert();
|
||||
ComputeFlushMethodType computeFlushMethod = nullptr;
|
||||
|
||||
@@ -119,14 +119,12 @@ void CommandListCoreFamilyImmediate<gfxCoreFamily>::updateDispatchFlagsWithRequi
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushBcsTask(NEO::LinearStream &cmdStreamTask, size_t taskStartOffset, bool hasStallingCmds, bool hasRelaxedOrderingDependencies, bool requireTaskCountUpdate, NEO::AppendOperations appendOperation, NEO::CommandStreamReceiver *csr) {
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = nullptr;
|
||||
|
||||
NEO::DispatchBcsFlags dispatchBcsFlags(
|
||||
this->isSyncModeQueue || requireTaskCountUpdate, // flushTaskCount
|
||||
hasStallingCmds, // hasStallingCmds
|
||||
hasRelaxedOrderingDependencies // hasRelaxedOrderingDependencies
|
||||
);
|
||||
dispatchBcsFlags.optionalEpilogueCmdStream = optionalEpilogueCmdStream;
|
||||
dispatchBcsFlags.optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
dispatchBcsFlags.dispatchOperation = appendOperation;
|
||||
|
||||
CommandListImp::storeReferenceTsToMappedEvents(true);
|
||||
@@ -261,7 +259,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
||||
handleHeapsAndResidencyForImmediateRegularTask<streamStatesSupported>(sshCpuPointer);
|
||||
}
|
||||
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = nullptr;
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
&this->requiredStreamState, // requiredState
|
||||
@@ -292,7 +290,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
|
||||
handleHeapsAndResidencyForImmediateRegularTask<streamStatesSupported>(sshCpuPointer);
|
||||
}
|
||||
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = nullptr;
|
||||
NEO::LinearStream *optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
NEO::ImmediateDispatchFlags dispatchFlags{
|
||||
nullptr, // requiredState
|
||||
@@ -332,7 +330,7 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
this->isSyncModeQueue, // blocking
|
||||
this->isSyncModeQueue, // dcFlush
|
||||
this->getCommandListSLMEnable(), // useSLM
|
||||
this->isSyncModeQueue, // guardCommandBufferWithPipeControl
|
||||
this->isSyncModeQueue || requireTaskCountUpdate, // guardCommandBufferWithPipeControl
|
||||
false, // gsba32BitRequired
|
||||
false, // lowPriority
|
||||
true, // implicitFlush
|
||||
@@ -349,6 +347,8 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
|
||||
false // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
|
||||
dispatchFlags.optionalEpilogueCmdStream = getOptionalEpilogueCmdStream(&cmdStreamTask, appendOperation);
|
||||
|
||||
auto ioh = (this->commandContainer.getIndirectHeap(NEO::IndirectHeap::Type::indirectObject));
|
||||
NEO::IndirectHeap *dsh = nullptr;
|
||||
NEO::IndirectHeap *ssh = nullptr;
|
||||
@@ -436,6 +436,14 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
|
||||
auto commandStream = this->commandContainer.getCommandStream();
|
||||
size_t commandStreamStart = this->cmdListCurrentStartOffset;
|
||||
if (appendOperation == NEO::AppendOperations::cmdList && this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
auto cmdListStartCmdBufferStream = reinterpret_cast<CommandQueueImp *>(cmdQ)->getStartingCmdBuffer();
|
||||
// check if queue starting stream is the same as immediate, if not - regular cmdlist is the starting command buffer
|
||||
if (cmdListStartCmdBufferStream != commandStream) {
|
||||
commandStream = cmdListStartCmdBufferStream;
|
||||
commandStreamStart = 0u;
|
||||
}
|
||||
}
|
||||
|
||||
auto csr = static_cast<CommandQueueImp *>(cmdQ)->getCsr();
|
||||
auto lockCSR = outerLock != nullptr ? std::move(*outerLock) : csr->obtainUniqueOwnership();
|
||||
@@ -494,7 +502,8 @@ inline ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::executeCommand
|
||||
auto cmdQImp = static_cast<CommandQueueImp *>(cmdQ);
|
||||
cmdQImp->clearHeapContainer();
|
||||
|
||||
this->cmdListCurrentStartOffset = commandStream->getUsed();
|
||||
// save offset from immediate stream - even when not used to dispatch commands, can be used for epilogue
|
||||
this->cmdListCurrentStartOffset = this->commandContainer.getCommandStream()->getUsed();
|
||||
this->containsAnyKernel = false;
|
||||
this->handlePostSubmissionState();
|
||||
|
||||
@@ -1186,6 +1195,18 @@ CommandQueue *CommandListCoreFamilyImmediate<gfxCoreFamily>::getCmdQImmediate(bo
|
||||
return copyOffloadOperation ? this->cmdQImmediateCopyOffload : this->cmdQImmediate;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
NEO::LinearStream *CommandListCoreFamilyImmediate<gfxCoreFamily>::getOptionalEpilogueCmdStream(NEO::LinearStream *taskCmdStream, NEO::AppendOperations appendOperation) {
|
||||
if (appendOperation == NEO::AppendOperations::cmdList && this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
auto commandStream = this->commandContainer.getCommandStream();
|
||||
// when regular cmd list is present as main command buffer, provide immediate command stream for epilogue
|
||||
if (commandStream != taskCmdStream) {
|
||||
return commandStream;
|
||||
}
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmediate(ze_result_t inputRet, bool performMigration, bool hasStallingCmds, bool hasRelaxedOrderingDependencies,
|
||||
NEO::AppendOperations appendOperation, bool copyOffloadSubmission, ze_event_handle_t hSignalEvent, bool requireTaskCountUpdate,
|
||||
@@ -1715,7 +1736,7 @@ ze_result_t CommandListCoreFamilyImmediate<gfxCoreFamily>::appendCommandLists(ui
|
||||
}
|
||||
|
||||
bool hasStallingCmds = true;
|
||||
return flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, NEO::AppendOperations::kernel, false, hSignalEvent, true, &mainAppendLock);
|
||||
return flushImmediate(ret, true, hasStallingCmds, relaxedOrderingDispatch, NEO::AppendOperations::cmdList, false, hSignalEvent, true, &mainAppendLock);
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2020-2024 Intel Corporation
|
||||
* Copyright (C) 2020-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -30,6 +30,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::csr;
|
||||
using BaseClass::desc;
|
||||
using BaseClass::device;
|
||||
using BaseClass::firstCmdListStream;
|
||||
using BaseClass::preemptionCmdSyncProgramming;
|
||||
using BaseClass::printfKernelContainer;
|
||||
using BaseClass::startingCmdBuffer;
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
|
||||
#include "level_zero/core/source/cmdqueue/cmdqueue_imp.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/core/source/image/image_hw.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/host_pointer_manager_fixture.h"
|
||||
@@ -1591,5 +1592,109 @@ HWTEST2_F(ImmediateCommandListTest, givenImmediateCmdListWhenAppendingRegularThe
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListTest,
|
||||
givenImmediateCmdListWithPrimaryBatchBufferWhenAppendingRegularCmdListThenCorrectEpilogueCmdBufferIsUsed, MatchAny) {
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
auto &ultCsr = neoDevice->getUltCommandStreamReceiver<FamilyType>();
|
||||
|
||||
commandList->close();
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
|
||||
auto regularCmdBufferStream = commandList->getCmdContainer().getCommandStream();
|
||||
auto regularCmdBufferAllocation = regularCmdBufferStream->getGraphicsAllocation();
|
||||
|
||||
auto cmdQImmediate = static_cast<WhiteBox<::L0::CommandQueue> *>(commandListImmediate->cmdQImmediate);
|
||||
|
||||
commandListImmediate->dispatchCmdListBatchBufferAsPrimary = true;
|
||||
cmdQImmediate->dispatchCmdListBatchBufferAsPrimary = true;
|
||||
auto dispatchRegularBufferLinearStream = &cmdQImmediate->firstCmdListStream;
|
||||
|
||||
// first append can carry preamble
|
||||
commandListImmediate->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr);
|
||||
|
||||
ultCsr.recordFlushedBatchBuffer = true;
|
||||
|
||||
auto immediateCmdBufferStream = commandListImmediate->getCmdContainer().getCommandStream();
|
||||
auto immediateCmdBufferOffset = immediateCmdBufferStream->getUsed();
|
||||
|
||||
// no preamble - regular cmdlist buffer will be first and immediate cmd buffer will be epilogue
|
||||
commandListImmediate->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr);
|
||||
|
||||
if (L0GfxCoreHelper::useImmediateComputeFlushTask(device->getNEODevice()->getRootDeviceEnvironment())) {
|
||||
EXPECT_EQ(NEO::AppendOperations::cmdList, ultCsr.recordedImmediateDispatchFlags.dispatchOperation);
|
||||
EXPECT_EQ(dispatchRegularBufferLinearStream, ultCsr.lastFlushedImmediateCommandStream);
|
||||
EXPECT_EQ(immediateCmdBufferStream, ultCsr.recordedImmediateDispatchFlags.optionalEpilogueCmdStream);
|
||||
} else {
|
||||
EXPECT_EQ(dispatchRegularBufferLinearStream, ultCsr.lastFlushedCommandStream);
|
||||
EXPECT_EQ(immediateCmdBufferStream, ultCsr.recordedDispatchFlags.optionalEpilogueCmdStream);
|
||||
}
|
||||
EXPECT_EQ(regularCmdBufferAllocation, ultCsr.latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
auto startStream = static_cast<L0::CommandQueueImp *>(commandListImmediate->cmdQImmediate)->getStartingCmdBuffer();
|
||||
EXPECT_EQ(dispatchRegularBufferLinearStream, startStream);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(immediateCmdBufferStream->getCpuBase(), immediateCmdBufferOffset),
|
||||
immediateCmdBufferStream->getUsed() - immediateCmdBufferOffset));
|
||||
|
||||
auto iterator = find<MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), iterator);
|
||||
}
|
||||
|
||||
HWTEST2_F(ImmediateCommandListTest,
|
||||
givenCopyEngineImmediateCmdListWithPrimaryBatchBufferWhenAppendingRegularCmdListThenCorrectEpilogueCmdBufferIsUsed, MatchAny) {
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
ze_result_t returnValue;
|
||||
|
||||
commandList.reset(CommandList::whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::copy, 0u, returnValue, false)));
|
||||
commandList->close();
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
commandListImmediate.reset(CommandList::whiteboxCast(CommandList::createImmediate(productFamily, device, &desc, false, NEO::EngineGroupType::copy, returnValue)));
|
||||
|
||||
auto regularCmdBufferStream = commandList->getCmdContainer().getCommandStream();
|
||||
auto regularCmdBufferAllocation = regularCmdBufferStream->getGraphicsAllocation();
|
||||
|
||||
auto cmdQImmediate = static_cast<WhiteBox<::L0::CommandQueue> *>(commandListImmediate->cmdQImmediate);
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(cmdQImmediate->csr);
|
||||
|
||||
commandListImmediate->dispatchCmdListBatchBufferAsPrimary = true;
|
||||
cmdQImmediate->dispatchCmdListBatchBufferAsPrimary = true;
|
||||
auto dispatchRegularBufferLinearStream = &cmdQImmediate->firstCmdListStream;
|
||||
|
||||
// first append can carry preamble
|
||||
commandListImmediate->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr);
|
||||
|
||||
ultCsr->recordFlushedBatchBuffer = true;
|
||||
|
||||
auto immediateCmdBufferStream = commandListImmediate->getCmdContainer().getCommandStream();
|
||||
auto immediateCmdBufferOffset = immediateCmdBufferStream->getUsed();
|
||||
|
||||
// no preamble - regular cmdlist buffer will be first and immediate cmd buffer will be epilogue
|
||||
commandListImmediate->appendCommandLists(1, &cmdListHandle, nullptr, 0, nullptr);
|
||||
|
||||
EXPECT_EQ(NEO::AppendOperations::cmdList, ultCsr->recordedBcsDispatchFlags.dispatchOperation);
|
||||
EXPECT_EQ(dispatchRegularBufferLinearStream, ultCsr->lastFlushedBcsCommandStream);
|
||||
EXPECT_EQ(immediateCmdBufferStream, ultCsr->recordedBcsDispatchFlags.optionalEpilogueCmdStream);
|
||||
EXPECT_EQ(regularCmdBufferAllocation, ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
auto startStream = static_cast<L0::CommandQueueImp *>(commandListImmediate->cmdQImmediate)->getStartingCmdBuffer();
|
||||
EXPECT_EQ(dispatchRegularBufferLinearStream, startStream);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(immediateCmdBufferStream->getCpuBase(), immediateCmdBufferOffset),
|
||||
immediateCmdBufferStream->getUsed() - immediateCmdBufferOffset));
|
||||
|
||||
auto iterator = find<MI_BATCH_BUFFER_END *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_NE(cmdList.end(), iterator);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2019-2024 Intel Corporation
|
||||
* Copyright (C) 2019-2025 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -46,4 +46,8 @@ struct DispatchFlagsHelper {
|
||||
false // isDcFlushRequiredOnStallingCommandsOnNextFlush
|
||||
);
|
||||
}
|
||||
|
||||
static DispatchBcsFlags createDefaultBcsDispatchFlags() {
|
||||
return DispatchBcsFlags(false, false, false);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -177,7 +177,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
uint32_t rootDeviceIndex,
|
||||
const DeviceBitfield deviceBitfield)
|
||||
: BaseClass(executionEnvironment, rootDeviceIndex, deviceBitfield), recursiveLockCounter(0),
|
||||
recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()) {
|
||||
recordedDispatchFlags(DispatchFlagsHelper::createDefaultDispatchFlags()),
|
||||
recordedBcsDispatchFlags(DispatchFlagsHelper::createDefaultBcsDispatchFlags()) {
|
||||
this->downloadAllocationImpl = [this](GraphicsAllocation &graphicsAllocation) {
|
||||
this->downloadAllocationUlt(graphicsAllocation);
|
||||
};
|
||||
@@ -238,6 +239,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
Device &device) override {
|
||||
recordedImmediateDispatchFlags = dispatchFlags;
|
||||
this->lastFlushedCommandStream = &commandStream;
|
||||
this->lastFlushedImmediateCommandStream = &immediateCommandStream;
|
||||
return BaseClass::flushImmediateTask(immediateCommandStream, immediateCommandStreamStart, dispatchFlags, device);
|
||||
}
|
||||
|
||||
@@ -247,9 +249,17 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
Device &device) override {
|
||||
recordedImmediateDispatchFlags = dispatchFlags;
|
||||
this->lastFlushedCommandStream = &commandStream;
|
||||
this->lastFlushedImmediateCommandStream = &immediateCommandStream;
|
||||
return BaseClass::flushImmediateTaskStateless(immediateCommandStream, immediateCommandStreamStart, dispatchFlags, device);
|
||||
}
|
||||
|
||||
CompletionStamp flushBcsTask(LinearStream &commandStreamTask, size_t commandStreamTaskStart,
|
||||
const DispatchBcsFlags &dispatchBcsFlags, const HardwareInfo &hwInfo) override {
|
||||
this->recordedBcsDispatchFlags = dispatchBcsFlags;
|
||||
this->lastFlushedBcsCommandStream = &commandStreamTask;
|
||||
return BaseClass::flushBcsTask(commandStreamTask, commandStreamTaskStart, dispatchBcsFlags, hwInfo);
|
||||
}
|
||||
|
||||
SubmissionStatus initializeDeviceWithFirstSubmission(Device &device) override {
|
||||
initializeDeviceWithFirstSubmissionCalled++;
|
||||
return BaseClass::initializeDeviceWithFirstSubmission(device);
|
||||
@@ -566,6 +576,8 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
TaskCountType flushBcsTaskReturnValue{};
|
||||
|
||||
LinearStream *lastFlushedCommandStream = nullptr;
|
||||
LinearStream *lastFlushedImmediateCommandStream = nullptr;
|
||||
LinearStream *lastFlushedBcsCommandStream = nullptr;
|
||||
LinearStream *commandStreamHeaplessStateInit = nullptr;
|
||||
|
||||
const IndirectHeap *recordedSsh = nullptr;
|
||||
@@ -588,6 +600,7 @@ class UltCommandStreamReceiver : public CommandStreamReceiverHw<GfxFamily> {
|
||||
mutable uint32_t checkGpuHangDetectedCalled = 0;
|
||||
int ensureCommandBufferAllocationCalled = 0;
|
||||
DispatchFlags recordedDispatchFlags;
|
||||
DispatchBcsFlags recordedBcsDispatchFlags;
|
||||
ImmediateDispatchFlags recordedImmediateDispatchFlags = {};
|
||||
BlitPropertiesContainer receivedBlitProperties = {};
|
||||
uint32_t createAllocationForHostSurfaceCalled = 0;
|
||||
|
||||
Reference in New Issue
Block a user