mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-08 05:56:36 +08:00
performance: dispatch and chain command list batch buffers as primary
Command list batch buffers should be chained when no dynamic or global preamble is present in command queue. Return to command queue, when preamble is required. Chain last command list to the command queue epilog. Provide first command list batch buffer to KMD/ULLS when no command queue preamble. Related-To: NEO-7807 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
2022592f3d
commit
f451207372
@@ -124,9 +124,13 @@ NEO::SubmissionStatus CommandQueueImp::submitBatchBuffer(size_t offset, NEO::Res
|
||||
bool isCooperative) {
|
||||
UNRECOVERABLE_IF(csr == nullptr);
|
||||
|
||||
NEO::BatchBuffer batchBuffer(commandStream.getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
|
||||
NEO::BatchBuffer batchBuffer(this->startingCmdBuffer->getGraphicsAllocation(), offset, 0, 0, nullptr, false, false,
|
||||
NEO::QueueThrottle::HIGH, NEO::QueueSliceCount::defaultSliceCount,
|
||||
commandStream.getUsed(), &commandStream, endingCmdPtr, csr->getNumClients(), false, false);
|
||||
this->startingCmdBuffer->getUsed(), this->startingCmdBuffer, endingCmdPtr, csr->getNumClients(), false, false);
|
||||
|
||||
if (this->startingCmdBuffer != &this->commandStream) {
|
||||
this->csr->makeResident(*this->commandStream.getGraphicsAllocation());
|
||||
}
|
||||
|
||||
commandStream.getGraphicsAllocation()->updateTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||
commandStream.getGraphicsAllocation()->updateResidencyTaskCount(csr->peekTaskCount() + 1, csr->getOsContext().getContextId());
|
||||
|
||||
@@ -74,9 +74,13 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
|
||||
NEO::StreamProperties cmdListBeginState{};
|
||||
uint64_t scratchGsba = 0;
|
||||
uint64_t childGpuAddressPositionBeforeDynamicPreamble = 0;
|
||||
|
||||
size_t spaceForResidency = 10;
|
||||
CommandList *firstCommandList = nullptr;
|
||||
CommandList *lastCommandList = nullptr;
|
||||
void *currentPatchForChainedBbStart = nullptr;
|
||||
|
||||
NEO::PreemptionMode preemptionMode{};
|
||||
NEO::PreemptionMode statePreemption{};
|
||||
uint32_t perThreadScratchSpaceSize = 0;
|
||||
@@ -125,6 +129,7 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
MOCKABLE_VIRTUAL bool isDispatchTaskCountPostSyncRequired(ze_fence_handle_t hFence, bool containsAnyRegularCmdList) const;
|
||||
inline size_t estimateLinearStreamSizeInitial(CommandListExecutionContext &ctx);
|
||||
inline size_t estimateCommandListSecondaryStart(CommandList *commandList);
|
||||
inline size_t estimateCommandListPrimaryStart(bool required);
|
||||
inline size_t estimateCommandListResidencySize(CommandList *commandList);
|
||||
inline void setFrontEndStateProperties(CommandListExecutionContext &ctx);
|
||||
inline void handleScratchSpaceAndUpdateGSBAStateDirtyFlag(CommandListExecutionContext &ctx);
|
||||
@@ -157,8 +162,12 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
inline void programOneCmdListFrontEndIfDirty(CommandListExecutionContext &ctx,
|
||||
NEO::LinearStream &commandStream,
|
||||
CommandListRequiredStateChange &cmdListRequiredState);
|
||||
inline void programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream);
|
||||
inline void programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
|
||||
inline void programOneCmdListBatchBufferStartPrimaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
|
||||
inline void programOneCmdListBatchBufferStartSecondaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx);
|
||||
inline void programLastCommandListReturnBbStart(
|
||||
NEO::LinearStream &commandStream,
|
||||
CommandListExecutionContext &ctx);
|
||||
inline void mergeOneCmdListPipelinedState(CommandList *commandList);
|
||||
inline void programFrontEndAndClearDirtyFlag(bool shouldFrontEndBeProgrammed,
|
||||
CommandListExecutionContext &ctx,
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include "shared/source/command_stream/command_stream_receiver_hw.h"
|
||||
#include "shared/source/command_stream/linear_stream.h"
|
||||
#include "shared/source/command_stream/preemption.h"
|
||||
#include "shared/source/command_stream/preemption_mode.h"
|
||||
#include "shared/source/command_stream/scratch_space_controller.h"
|
||||
#include "shared/source/command_stream/wait_status.h"
|
||||
#include "shared/source/debugger/debugger_l0.h"
|
||||
@@ -78,13 +79,14 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
auto neoDevice = device->getNEODevice();
|
||||
auto ctx = CommandListExecutionContext{phCommandLists,
|
||||
numCommandLists,
|
||||
csr->getPreemptionMode(),
|
||||
this->isCopyOnlyCommandQueue ? NEO::PreemptionMode::Disabled : csr->getPreemptionMode(),
|
||||
device,
|
||||
NEO::Debugger::isDebugEnabled(internalUsage),
|
||||
csr->isProgramActivePartitionConfigRequired(),
|
||||
performMigration};
|
||||
ctx.globalInit |= ctx.isDebugEnabled && !this->commandQueueDebugCmdsProgrammed && (neoDevice->getSourceLevelDebugger() || device->getL0Debugger());
|
||||
|
||||
this->startingCmdBuffer = &this->commandStream;
|
||||
this->device->activateMetricGroups();
|
||||
|
||||
if (this->isCopyOnlyCommandQueue) {
|
||||
@@ -185,6 +187,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(commandListHandles[i]);
|
||||
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
|
||||
|
||||
if (this->stateChanges.size() > this->currentStateChangeIndex) {
|
||||
auto &stateChange = this->stateChanges[this->currentStateChangeIndex];
|
||||
if (stateChange.cmdListIndex == i) {
|
||||
@@ -214,6 +218,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
this->updateBaseAddressState(ctx.lastCommandList);
|
||||
this->migrateSharedAllocationsIfRequested(ctx.isMigrationRequested, ctx.firstCommandList);
|
||||
|
||||
this->programLastCommandListReturnBbStart(child, ctx);
|
||||
this->programStateSipEndWA(ctx.stateSipRequired, child);
|
||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||
this->dispatchTaskCountPostSyncRegular(ctx.isDispatchTaskCountPostSyncRequired, child);
|
||||
@@ -250,6 +255,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
|
||||
ctx.spaceForResidency += estimateCommandListResidencySize(commandList);
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(ctx.globalInit);
|
||||
|
||||
this->csr->getResidencyAllocations().reserve(ctx.spaceForResidency);
|
||||
|
||||
NEO::EncodeDummyBlitWaArgs waArgs{false, &(this->device->getNEODevice()->getRootDeviceEnvironmentRef())};
|
||||
@@ -270,7 +277,9 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; ++i) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
this->programOneCmdListBatchBufferStart(commandList, child);
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble = child.getCurrentGpuAddressPosition();
|
||||
|
||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||
this->mergeOneCmdListPipelinedState(commandList);
|
||||
this->prefetchMemoryToDeviceAssociatedWithCmdList(commandList);
|
||||
}
|
||||
@@ -278,6 +287,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsCopyOnly(
|
||||
|
||||
this->assignCsrTaskCountToFenceIfAvailable(hFence);
|
||||
|
||||
this->programLastCommandListReturnBbStart(child, ctx);
|
||||
this->dispatchTaskCountPostSyncByMiFlushDw(ctx.isDispatchTaskCountPostSyncRequired, child);
|
||||
|
||||
this->makeCsrTagAllocationResident();
|
||||
@@ -559,7 +569,7 @@ void CommandQueueHw<gfxCoreFamily>::setupCmdListsAndContextParams(
|
||||
uint32_t numCommandLists,
|
||||
ze_fence_handle_t hFence) {
|
||||
|
||||
ctx.containsAnyRegularCmdList |= ctx.firstCommandList->getCmdListType() == CommandList::CommandListType::TYPE_REGULAR;
|
||||
ctx.containsAnyRegularCmdList = ctx.firstCommandList->getCmdListType() == CommandList::CommandListType::TYPE_REGULAR;
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
@@ -625,7 +635,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
|
||||
auto hwContextSizeEstimate = this->csr->getCmdsSizeForHardwareContext();
|
||||
if (hwContextSizeEstimate > 0) {
|
||||
linearStreamSizeEstimate += hwContextSizeEstimate;
|
||||
ctx.globalInit |= true;
|
||||
ctx.globalInit = true;
|
||||
}
|
||||
|
||||
if (ctx.isDirectSubmissionEnabled) {
|
||||
@@ -644,7 +654,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
|
||||
|
||||
if (NEO::DebugManager.flags.EnableSWTags.get()) {
|
||||
linearStreamSizeEstimate += NEO::SWTagsManager::estimateSpaceForSWTags<GfxFamily>();
|
||||
ctx.globalInit |= true;
|
||||
ctx.globalInit = true;
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += NEO::EncodeKernelArgsBuffer<GfxFamily>::getKernelArgsBufferCmdsSize(this->csr->getKernelArgsBufferAllocation(),
|
||||
@@ -669,8 +679,18 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeInitial(
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListSecondaryStart(CommandList *commandList) {
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
return (commandList->getCmdContainer().getCmdBufferAllocations().size() * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
|
||||
if (!this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
return (commandList->getCmdContainer().getCmdBufferAllocations().size() * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimateCommandListPrimaryStart(bool required) {
|
||||
if (this->dispatchCmdListBatchBufferAsPrimary && required) {
|
||||
return NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize();
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
@@ -758,6 +778,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
if (propertyScmDirty || propertyFeDirty || propertyPsDirty || propertySbaDirty || frontEndReturnPoint || propertyPreemptionDirty) {
|
||||
CommandListDirtyFlags dirtyFlags = {propertyScmDirty, propertyFeDirty, propertyPsDirty, propertySbaDirty, frontEndReturnPoint, propertyPreemptionDirty};
|
||||
this->stateChanges.emplace_back(stagingState, cmdList, dirtyFlags, ctx.statePreemption, i);
|
||||
linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(true);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -770,7 +791,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
|
||||
linearStreamSizeEstimate += csrHw->getCmdSizeForPerDssBackedBuffer(this->device->getHwInfo());
|
||||
|
||||
ctx.globalInit |= true;
|
||||
ctx.globalInit = true;
|
||||
}
|
||||
|
||||
NEO::Device *neoDevice = this->device->getNEODevice();
|
||||
@@ -781,6 +802,10 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
linearStreamSizeEstimate += NEO::PreemptionHelper::getRequiredStateSipCmdSize<GfxFamily>(*neoDevice, this->csr->isRcs());
|
||||
}
|
||||
|
||||
bool firstCmdlistDynamicPreamble = (this->stateChanges.size() > 0 && this->stateChanges[0].cmdListIndex == 0);
|
||||
bool estimateBbStartForGlobalInitOnly = !firstCmdlistDynamicPreamble && ctx.globalInit;
|
||||
linearStreamSizeEstimate += this->estimateCommandListPrimaryStart(estimateBbStartForGlobalInitOnly);
|
||||
|
||||
return linearStreamSizeEstimate;
|
||||
}
|
||||
|
||||
@@ -983,18 +1008,63 @@ void CommandQueueHw<gfxCoreFamily>::writeCsrStreamInlineIfLogicalStateHelperAvai
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &cmdStream) {
|
||||
CommandListExecutionContext ctx = {};
|
||||
programOneCmdListBatchBufferStart(commandList, cmdStream, ctx);
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx) {
|
||||
if (this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
programOneCmdListBatchBufferStartPrimaryBatchBuffer(commandList, commandStream, ctx);
|
||||
} else {
|
||||
programOneCmdListBatchBufferStartSecondaryBatchBuffer(commandList, commandStream, ctx);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &cmdStream, CommandListExecutionContext &ctx) {
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStartPrimaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx) {
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
|
||||
NEO::CommandContainer &cmdListContainer = commandList->getCmdContainer();
|
||||
NEO::GraphicsAllocation *cmdListFirstCmdBuffer = cmdListContainer.getCmdBufferAllocations()[0];
|
||||
auto bbStartPatchLocation = reinterpret_cast<MI_BATCH_BUFFER_START *>(ctx.currentPatchForChainedBbStart);
|
||||
|
||||
bool dynamicPreamble = ctx.childGpuAddressPositionBeforeDynamicPreamble != commandStream.getCurrentGpuAddressPosition();
|
||||
if (ctx.globalInit || dynamicPreamble) {
|
||||
if (ctx.currentPatchForChainedBbStart) {
|
||||
// dynamic preamble, 2nd or later command list
|
||||
// jump from previous command list to the position before dynamic preamble
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(
|
||||
bbStartPatchLocation,
|
||||
ctx.childGpuAddressPositionBeforeDynamicPreamble,
|
||||
false, false, false);
|
||||
}
|
||||
// dynamic preamble, jump from current position, after dynamic preamble to the current command list
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream, cmdListFirstCmdBuffer->getGpuAddress(), false, false, false);
|
||||
|
||||
ctx.globalInit = false;
|
||||
} else {
|
||||
if (ctx.currentPatchForChainedBbStart == nullptr) {
|
||||
// nothing to dispatch from queue, first command list will be used as submitting batch buffer to KMD or ULLS
|
||||
size_t firstCmdBufferAlignedSize = cmdListContainer.getAlignedPrimarySize();
|
||||
this->firstCmdListStream.replaceGraphicsAllocation(cmdListFirstCmdBuffer);
|
||||
this->firstCmdListStream.replaceBuffer(cmdListFirstCmdBuffer->getUnderlyingBuffer(), firstCmdBufferAlignedSize);
|
||||
this->firstCmdListStream.getSpace(firstCmdBufferAlignedSize);
|
||||
this->startingCmdBuffer = &this->firstCmdListStream;
|
||||
} else {
|
||||
// chain between command lists when no dynamic preamble required between 2nd and next command list
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(
|
||||
bbStartPatchLocation,
|
||||
cmdListFirstCmdBuffer->getGpuAddress(),
|
||||
false, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
ctx.currentPatchForChainedBbStart = cmdListContainer.getEndCmdPtr();
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStartSecondaryBatchBuffer(CommandList *commandList, NEO::LinearStream &commandStream, CommandListExecutionContext &ctx) {
|
||||
auto &commandContainer = commandList->getCmdContainer();
|
||||
|
||||
auto &cmdBufferAllocations = commandContainer.getCmdBufferAllocations();
|
||||
auto cmdBufferCount = cmdBufferAllocations.size();
|
||||
bool isCommandListImmediate = (commandList->getCmdListType() == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
|
||||
bool isCommandListImmediate = !ctx.containsAnyRegularCmdList;
|
||||
|
||||
auto &returnPoints = commandList->getReturnPoints();
|
||||
uint32_t returnPointsSize = commandList->getReturnPointsSize();
|
||||
@@ -1006,7 +1076,7 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
|
||||
if (isCommandListImmediate && (iter == (cmdBufferCount - 1))) {
|
||||
startOffset = ptrOffset(allocation->getGpuAddress(), commandContainer.currentLinearStreamStartOffsetRef());
|
||||
}
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream, startOffset, true, false, false);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream, startOffset, true, false, false);
|
||||
if (returnPointsSize > 0) {
|
||||
bool cmdBufferHasRestarts = std::find_if(
|
||||
std::next(returnPoints.begin(), returnPointIdx),
|
||||
@@ -1020,9 +1090,9 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
|
||||
ctx.cmdListBeginState.frontEndState.copyPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(returnPoints[returnPointIdx].configSnapshot.frontEndState);
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSize(),
|
||||
cmdStream,
|
||||
commandStream,
|
||||
ctx.cmdListBeginState);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream,
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&commandStream,
|
||||
returnPoints[returnPointIdx].gpuAddress,
|
||||
true, false, false);
|
||||
returnPointIdx++;
|
||||
@@ -1032,6 +1102,20 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programLastCommandListReturnBbStart(
|
||||
NEO::LinearStream &commandStream,
|
||||
CommandListExecutionContext &ctx) {
|
||||
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
|
||||
if (this->dispatchCmdListBatchBufferAsPrimary) {
|
||||
auto finalReturnPosition = commandStream.getCurrentGpuAddressPosition();
|
||||
auto bbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(ctx.currentPatchForChainedBbStart);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(bbStartCmd,
|
||||
finalReturnPosition,
|
||||
false, false, false);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::mergeOneCmdListPipelinedState(CommandList *commandList) {
|
||||
|
||||
@@ -1166,8 +1250,11 @@ NEO::SubmissionStatus CommandQueueHw<gfxCoreFamily>::prepareAndSubmitBatchBuffer
|
||||
void *paddingPtr = innerCommandStream.getSpace(this->alignedChildStreamPadding);
|
||||
memset(paddingPtr, 0, this->alignedChildStreamPadding);
|
||||
}
|
||||
size_t startOffset = (this->startingCmdBuffer == &this->firstCmdListStream)
|
||||
? 0
|
||||
: ptrDiff(innerCommandStream.getCpuBase(), outerCommandStream.getCpuBase());
|
||||
|
||||
return submitBatchBuffer(ptrDiff(innerCommandStream.getCpuBase(), outerCommandStream.getCpuBase()),
|
||||
return submitBatchBuffer(startOffset,
|
||||
csr->getResidencyAllocations(),
|
||||
endingCmd,
|
||||
ctx.anyCommandListWithCooperativeKernels);
|
||||
|
||||
@@ -129,6 +129,7 @@ struct CommandQueueImp : public CommandQueue {
|
||||
|
||||
CommandBufferManager buffers;
|
||||
NEO::LinearStream commandStream{};
|
||||
NEO::LinearStream firstCmdListStream{};
|
||||
NEO::HeapContainer heapContainer;
|
||||
ze_command_queue_desc_t desc;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
@@ -138,6 +139,7 @@ struct CommandQueueImp : public CommandQueue {
|
||||
|
||||
Device *device = nullptr;
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
NEO::LinearStream *startingCmdBuffer = nullptr;
|
||||
|
||||
uint32_t currentStateChangeIndex = 0;
|
||||
|
||||
|
||||
@@ -187,7 +187,6 @@ void CommandQueueHw<gfxCoreFamily>::patchCommands(CommandList &commandList, uint
|
||||
for (auto &commandToPatch : commandsToPatch) {
|
||||
switch (commandToPatch.type) {
|
||||
case CommandList::CommandToPatch::FrontEndState: {
|
||||
UNRECOVERABLE_IF(scratchAddress == 0u);
|
||||
uint32_t lowScratchAddress = uint32_t(0xFFFFFFFF & scratchAddress);
|
||||
CFE_STATE *cfeStateCmd = nullptr;
|
||||
cfeStateCmd = reinterpret_cast<CFE_STATE *>(commandToPatch.pCommand);
|
||||
|
||||
@@ -334,7 +334,7 @@ void CommandListAppendLaunchRayTracingKernelFixture::tearDown() {
|
||||
}
|
||||
|
||||
void PrimaryBatchBufferCmdListFixture::setUp() {
|
||||
NEO::DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(1);
|
||||
DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(1);
|
||||
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
}
|
||||
|
||||
@@ -30,6 +30,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::device;
|
||||
using BaseClass::preemptionCmdSyncProgramming;
|
||||
using BaseClass::printfKernelContainer;
|
||||
using BaseClass::startingCmdBuffer;
|
||||
using BaseClass::submitBatchBuffer;
|
||||
using BaseClass::synchronizeByPollingForTaskCount;
|
||||
using BaseClass::taskCount;
|
||||
@@ -71,6 +72,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
using BaseClass::commandStream;
|
||||
using BaseClass::prepareAndSubmitBatchBuffer;
|
||||
using BaseClass::printfKernelContainer;
|
||||
using BaseClass::startingCmdBuffer;
|
||||
using L0::CommandQueue::activeSubDevices;
|
||||
using L0::CommandQueue::cmdListHeapAddressModel;
|
||||
using L0::CommandQueue::dispatchCmdListBatchBufferAsPrimary;
|
||||
@@ -106,6 +108,9 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
if (submitBatchBufferReturnValue.has_value()) {
|
||||
return *submitBatchBufferReturnValue;
|
||||
}
|
||||
if (this->startingCmdBuffer == nullptr) {
|
||||
this->startingCmdBuffer = &this->commandStream;
|
||||
}
|
||||
return BaseClass::submitBatchBuffer(offset, residencyContainer, endingCmdPtr, isCooperative);
|
||||
}
|
||||
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib.h"
|
||||
#include "level_zero/core/source/device/device.h"
|
||||
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
|
||||
#include "level_zero/core/source/image/image_hw.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
@@ -1530,6 +1531,113 @@ HWTEST_F(PrimaryBatchBufferCmdListTest, givenPrimaryBatchBufferWhenCommandListHa
|
||||
EXPECT_EQ(expectedEndPtr, cmdContainer.getEndCmdPtr());
|
||||
}
|
||||
|
||||
HWTEST_F(PrimaryBatchBufferCmdListTest, givenPrimaryBatchBufferWhenCopyCommandListAndQueueAreCreatedThenFirstDispatchCreatesGlobalInitPreambleAndLaterDispatchProvideCmdListBuffer) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
ze_result_t returnValue;
|
||||
uint32_t count = 0u;
|
||||
returnValue = device->getCommandQueueGroupProperties(&count, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
EXPECT_GT(count, 0u);
|
||||
|
||||
std::vector<ze_command_queue_group_properties_t> properties(count);
|
||||
returnValue = device->getCommandQueueGroupProperties(&count, properties.data());
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
uint32_t ordinal = 0u;
|
||||
for (ordinal = 0u; ordinal < count; ordinal++) {
|
||||
if ((properties[ordinal].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COPY) &&
|
||||
!(properties[ordinal].flags & ZE_COMMAND_QUEUE_GROUP_PROPERTY_FLAG_COMPUTE)) {
|
||||
if (properties[ordinal].numQueues == 0) {
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ordinal == count) {
|
||||
GTEST_SKIP();
|
||||
}
|
||||
|
||||
void *dstPtr = nullptr;
|
||||
void *srcPtr = nullptr;
|
||||
const size_t size = 64;
|
||||
ze_device_mem_alloc_desc_t deviceDesc = {};
|
||||
returnValue = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 4u, &dstPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
returnValue = context->allocDeviceMem(device->toHandle(), &deviceDesc, size, 4u, &srcPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
ze_command_queue_desc_t desc{};
|
||||
desc.ordinal = ordinal;
|
||||
desc.index = 0u;
|
||||
|
||||
ze_command_queue_handle_t commandQueueHandle;
|
||||
returnValue = device->createCommandQueue(&desc, &commandQueueHandle);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
auto commandQueueCopy = static_cast<L0::ult::CommandQueue *>(L0::CommandQueue::fromHandle(commandQueueHandle));
|
||||
ASSERT_NE(commandQueueCopy, nullptr);
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueueCopy->getCsr());
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
std::unique_ptr<L0::ult::CommandList> commandListCopy;
|
||||
commandListCopy.reset(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::Copy, 0u, returnValue)));
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto &cmdContainerCopy = commandListCopy->getCmdContainer();
|
||||
auto &cmdListStream = *cmdContainerCopy.getCommandStream();
|
||||
auto firstCmdBufferAllocation = cmdContainerCopy.getCmdBufferAllocations()[0];
|
||||
|
||||
returnValue = commandListCopy->appendMemoryCopy(dstPtr, srcPtr, size, nullptr, 0, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
size_t firstCmdBufferUsed = cmdListStream.getUsed();
|
||||
auto bbStartSpace = ptrOffset(cmdListStream.getCpuBase(), firstCmdBufferUsed);
|
||||
|
||||
returnValue = commandListCopy->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
EXPECT_EQ(bbStartSpace, cmdContainerCopy.getEndCmdPtr());
|
||||
size_t expectedAlignedUse = alignUp(firstCmdBufferUsed + sizeof(MI_BATCH_BUFFER_START), NEO::CommandContainer::minCmdBufferPtrAlign);
|
||||
EXPECT_EQ(expectedAlignedUse, cmdContainerCopy.getAlignedPrimarySize());
|
||||
|
||||
size_t blitterContextInitSize = ultCsr->getCmdsSizeForHardwareContext();
|
||||
|
||||
auto cmdListHandle = commandListCopy->toHandle();
|
||||
returnValue = commandQueueCopy->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(bbStartSpace);
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
auto &cmdQueueStream = commandQueueCopy->commandStream;
|
||||
if (blitterContextInitSize > 0) {
|
||||
EXPECT_EQ(cmdQueueStream.getGraphicsAllocation(), ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
} else {
|
||||
EXPECT_EQ(firstCmdBufferAllocation, ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
EXPECT_EQ(cmdQueueStream.getGpuBase(), bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
size_t queueSizeUsed = cmdQueueStream.getUsed();
|
||||
|
||||
returnValue = commandQueueCopy->executeCommandLists(1, &cmdListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
|
||||
bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(bbStartSpace);
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
EXPECT_EQ(cmdQueueStream.getGpuBase() + queueSizeUsed, bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
commandQueueCopy->destroy();
|
||||
commandListCopy.reset(nullptr);
|
||||
|
||||
returnValue = context->freeMem(dstPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
returnValue = context->freeMem(srcPtr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
|
||||
}
|
||||
|
||||
using PrimaryBatchBufferPreamblelessCmdListTest = Test<PrimaryBatchBufferPreamblelessCmdListFixture>;
|
||||
|
||||
HWTEST2_F(PrimaryBatchBufferPreamblelessCmdListTest,
|
||||
@@ -1592,5 +1700,213 @@ HWTEST2_F(PrimaryBatchBufferPreamblelessCmdListTest,
|
||||
EXPECT_EQ((uncachedMocs << 1), sbaCmd->getStatelessDataPortAccessMemoryObjectControlState());
|
||||
}
|
||||
|
||||
HWTEST2_F(PrimaryBatchBufferPreamblelessCmdListTest,
|
||||
givenPrimaryBatchBufferWhenExecutingCommandWithoutPreambleThenUseCommandListBufferAsStartingBuffer,
|
||||
IsAtLeastXeHpCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
EXPECT_EQ(cmdQueueStream.getGraphicsAllocation(), ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
size_t queueUsedSize = cmdQueueStream.getUsed();
|
||||
auto gpuReturnAddress = cmdQueueStream.getGpuBase() + queueUsedSize;
|
||||
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdContainer = commandList->getCmdContainer();
|
||||
auto firstCmdBufferAllocation = cmdContainer.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(firstCmdBufferAllocation, ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
EXPECT_EQ(gpuReturnAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(PrimaryBatchBufferPreamblelessCmdListTest,
|
||||
givenPrimaryBatchBufferWhenExecutingMultipleCommandListsAndEachWithoutPreambleThenUseCommandListBufferAsStartingBufferAndChainAllCommandLists,
|
||||
IsAtLeastXeHpCore) {
|
||||
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t commandLists[] = {commandList->toHandle(),
|
||||
commandList2->toHandle(),
|
||||
commandList3->toHandle()};
|
||||
|
||||
result = commandQueue->executeCommandLists(1, commandLists, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
EXPECT_EQ(cmdQueueStream.getGraphicsAllocation(), ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
size_t queueUsedSize = cmdQueueStream.getUsed();
|
||||
auto gpuReturnAddress = cmdQueueStream.getGpuBase() + queueUsedSize;
|
||||
|
||||
result = commandList2->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList2->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList3->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList3->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandQueue->executeCommandLists(3, commandLists, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), queueUsedSize),
|
||||
cmdQueueStream.getUsed() - queueUsedSize));
|
||||
auto cmdQueueBbStartCmds = findAll<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, cmdQueueBbStartCmds.size());
|
||||
|
||||
auto &cmdContainer1stCmdList = commandList->getCmdContainer();
|
||||
auto dispatchCmdBufferAllocation = cmdContainer1stCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(dispatchCmdBufferAllocation, ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer1stCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
auto &cmdContainer2ndCmdList = commandList2->getCmdContainer();
|
||||
auto secondCmdBufferAllocation = cmdContainer2ndCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(secondCmdBufferAllocation->getGpuAddress(), bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer2ndCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
auto &cmdContainer3rdCmdList = commandList3->getCmdContainer();
|
||||
auto thirdCmdBufferAllocation = cmdContainer3rdCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(thirdCmdBufferAllocation->getGpuAddress(), bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer3rdCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
EXPECT_EQ(gpuReturnAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
HWTEST2_F(PrimaryBatchBufferPreamblelessCmdListTest,
|
||||
givenPrimaryBatchBufferWhenExecutingMultipleCommandListsAndSecondWithPreambleThenUseCommandListBufferAsStartingBufferAndChainFirstListToQueuePreambleAndAfterToSecondList,
|
||||
IsAtLeastXeHpCore) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
auto ultCsr = static_cast<UltCommandStreamReceiver<FamilyType> *>(commandQueue->getCsr());
|
||||
ultCsr->recordFlusheBatchBuffer = true;
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
ze_command_list_handle_t commandLists[] = {commandList->toHandle(),
|
||||
commandList2->toHandle(),
|
||||
commandList3->toHandle()};
|
||||
|
||||
result = commandQueue->executeCommandLists(1, commandLists, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
EXPECT_EQ(cmdQueueStream.getGraphicsAllocation(), ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
size_t queueUsedSize = cmdQueueStream.getUsed();
|
||||
auto gpuReturnAddress = cmdQueueStream.getGpuBase() + queueUsedSize;
|
||||
|
||||
kernel->kernelRequiresUncachedMocsCount++;
|
||||
|
||||
result = commandList2->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList2->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList3->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList3->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandQueue->executeCommandLists(3, commandLists, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
// 1st command list is preambleless
|
||||
auto &cmdContainer1stCmdList = commandList->getCmdContainer();
|
||||
auto dispatchCmdBufferAllocation = cmdContainer1stCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(dispatchCmdBufferAllocation, ultCsr->latestFlushedBatchBuffer.commandBufferAllocation);
|
||||
|
||||
auto bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer1stCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
// ending BB_START of 1st command list points to dynamic preamble - dirty stateless mocs SBA command
|
||||
EXPECT_EQ(gpuReturnAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), queueUsedSize),
|
||||
cmdQueueStream.getUsed() - queueUsedSize));
|
||||
auto cmdQueueSbaDirtyCmds = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_TRUE(cmdQueueSbaDirtyCmds.size() >= 1u);
|
||||
|
||||
auto cmdQueueBbStartCmds = findAll<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, cmdQueueBbStartCmds.size());
|
||||
|
||||
auto chainFromPreambleToSecondBbStartCmd = reinterpret_cast<MI_BATCH_BUFFER_START *>(*cmdQueueBbStartCmds[0]);
|
||||
|
||||
auto &cmdContainer2ndCmdList = commandList2->getCmdContainer();
|
||||
auto secondCmdBufferAllocation = cmdContainer2ndCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(secondCmdBufferAllocation->getGpuAddress(), chainFromPreambleToSecondBbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer2ndCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
auto &cmdContainer3rdCmdList = commandList3->getCmdContainer();
|
||||
auto thirdCmdBufferAllocation = cmdContainer3rdCmdList.getCmdBufferAllocations()[0];
|
||||
EXPECT_EQ(thirdCmdBufferAllocation->getGpuAddress(), bbStartCmd->getBatchBufferStartAddress());
|
||||
|
||||
bbStartCmd = genCmdCast<MI_BATCH_BUFFER_START *>(cmdContainer3rdCmdList.getEndCmdPtr());
|
||||
ASSERT_NE(nullptr, bbStartCmd);
|
||||
|
||||
size_t sbaSize = sizeof(STATE_BASE_ADDRESS) + NEO::MemorySynchronizationCommands<FamilyType>::getSizeForSingleBarrier(false);
|
||||
if (commandQueue->doubleSbaWa) {
|
||||
sbaSize += sizeof(STATE_BASE_ADDRESS);
|
||||
}
|
||||
|
||||
gpuReturnAddress += sizeof(MI_BATCH_BUFFER_START) + sbaSize;
|
||||
EXPECT_EQ(gpuReturnAddress, bbStartCmd->getBatchBufferStartAddress());
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1534,15 +1534,19 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ze_result_t result;
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto &commandsToPatch = commandList->commandsToPatch;
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x40;
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
size_t usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
ASSERT_EQ(1u, commandsToPatch.size());
|
||||
@@ -1555,11 +1559,13 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(1u, commandsToPatch.size());
|
||||
@@ -1570,7 +1576,8 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
|
||||
|
||||
usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
ASSERT_EQ(2u, commandsToPatch.size());
|
||||
@@ -1583,12 +1590,14 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
}
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
ASSERT_EQ(3u, commandsToPatch.size());
|
||||
@@ -1601,15 +1610,40 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
commandList->reset();
|
||||
ASSERT_EQ(3u, commandsToPatch.size());
|
||||
|
||||
bool disableFusionStates[] = {true, false, true};
|
||||
uint32_t disableFusionStatesIdx = 0;
|
||||
|
||||
for (const auto &cfeToPatch : commandsToPatch) {
|
||||
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfeToPatch.type);
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfeToPatch.pDestination);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
|
||||
EXPECT_EQ(disableFusionStates[disableFusionStatesIdx++],
|
||||
NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_NE(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
}
|
||||
|
||||
result = commandList->reset();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndAddedToPatchlist,
|
||||
IsAtLeastXeHpCore) {
|
||||
@@ -1619,6 +1653,8 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto &productHelper = device->getProductHelper();
|
||||
productHelper.fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.perThreadScratchSize[0] = 0x40;
|
||||
|
||||
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
|
||||
|
||||
EXPECT_TRUE(commandList->frontEndStateTracking);
|
||||
@@ -1644,6 +1680,7 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
@@ -1668,6 +1705,7 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
@@ -1683,12 +1721,36 @@ HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_EQ(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, true);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
commandList->reset();
|
||||
ASSERT_EQ(3u, commandsToPatch.size());
|
||||
|
||||
bool computeDispatchAllWalkerStates[] = {true, false, true};
|
||||
uint32_t computeDispatchAllWalkerStatesIdx = 0;
|
||||
|
||||
for (const auto &cfeToPatch : commandsToPatch) {
|
||||
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfeToPatch.type);
|
||||
auto cfeCmd = genCmdCast<CFE_STATE *>(cfeToPatch.pDestination);
|
||||
ASSERT_NE(nullptr, cfeCmd);
|
||||
|
||||
EXPECT_EQ(computeDispatchAllWalkerStates[computeDispatchAllWalkerStatesIdx++],
|
||||
NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
|
||||
EXPECT_NE(0u, cfeCmd->getScratchSpaceBuffer());
|
||||
}
|
||||
|
||||
result = commandList->reset();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
EXPECT_EQ(0u, commandsToPatch.size());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -650,6 +650,7 @@ HWTEST_F(CommandQueueCreate, givenContainerWithAllocationsWhenResidencyContainer
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue->startingCmdBuffer = &commandQueue->commandStream;
|
||||
ResidencyContainer container;
|
||||
TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount();
|
||||
TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
|
||||
@@ -676,6 +677,7 @@ HWTEST_F(CommandQueueCreate, givenCommandStreamReceiverFailsThenSubmitBatchBuffe
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue->startingCmdBuffer = &commandQueue->commandStream;
|
||||
ResidencyContainer container;
|
||||
TaskCountType peekTaskCountBefore = commandQueue->csr->peekTaskCount();
|
||||
TaskCountType flushedTaskCountBefore = commandQueue->csr->peekLatestFlushedTaskCount();
|
||||
@@ -701,6 +703,7 @@ HWTEST_F(CommandQueueCreate, givenOutOfMemoryThenSubmitBatchBufferReturnsOutOfMe
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
commandQueue->startingCmdBuffer = &commandQueue->commandStream;
|
||||
ResidencyContainer container;
|
||||
NEO::SubmissionStatus ret = commandQueue->submitBatchBuffer(0, container, nullptr, false);
|
||||
EXPECT_EQ(ret, NEO::SubmissionStatus::OUT_OF_MEMORY);
|
||||
|
||||
@@ -1014,29 +1014,6 @@ HWTEST2_F(CommandQueueScratchTests, whenPatchCommandsIsCalledThenCommandsAreCorr
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(CommandQueueScratchTests, givenInvalidScratchAddressWhenPatchCommandsIsCalledThenAbortIsThrown, IsAtLeastXeHpCore) {
|
||||
using CFE_STATE = typename FamilyType::CFE_STATE;
|
||||
|
||||
ze_command_queue_desc_t desc = {};
|
||||
NEO::CommandStreamReceiver *csr = nullptr;
|
||||
device->getCsrForOrdinalAndIndex(&csr, 0u, 0u);
|
||||
auto commandQueue = std::make_unique<MockCommandQueueHw<gfxCoreFamily>>(device, csr, &desc);
|
||||
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
|
||||
|
||||
CFE_STATE destinationCfeState;
|
||||
auto sourceCfeState = new CFE_STATE;
|
||||
*sourceCfeState = FamilyType::cmdInitCfeState;
|
||||
|
||||
CommandList::CommandToPatch commandToPatch;
|
||||
commandToPatch.pDestination = &destinationCfeState;
|
||||
commandToPatch.pCommand = sourceCfeState;
|
||||
commandToPatch.type = CommandList::CommandToPatch::CommandType::FrontEndState;
|
||||
commandList->commandsToPatch.push_back(commandToPatch);
|
||||
|
||||
uint64_t invalidScratchAddress = 0u;
|
||||
EXPECT_ANY_THROW(commandQueue->patchCommands(*commandList, invalidScratchAddress));
|
||||
}
|
||||
|
||||
using IsWithinNotSupported = IsWithinGfxCore<IGFX_GEN9_CORE, IGFX_GEN12LP_CORE>;
|
||||
|
||||
HWTEST2_F(CommandQueueScratchTests, givenCommandsToPatchToNotSupportedPlatformWhenPatchCommandsIsCalledThenAbortIsThrown, IsWithinNotSupported) {
|
||||
|
||||
Reference in New Issue
Block a user