Add multi return points command lists for front end reconfiguration

This change gives fine grain control over front end configuration for each
kernel.
As it gives possible to inject FE command in command queue and return to exact
place in command list.
Programming commands in queue makes patching commands in command lists
not needed as that operation is costly.
And it allows to program context information for each command list too.

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-09-02 12:24:47 +00:00
committed by Compute-Runtime-Automation
parent 7d164ec118
commit b6e2d2df8b
16 changed files with 1032 additions and 15 deletions

View File

@@ -35,12 +35,18 @@ struct CmdListKernelLaunchParams {
bool isDestinationAllocationInSystemMemory = false;
};
struct CmdListReturnPoint {
NEO::StreamProperties configSnapshot;
uint64_t gpuAddress = 0;
NEO::GraphicsAllocation *currentCmdBuffer = nullptr;
};
struct CommandList : _ze_command_list_handle_t {
static constexpr uint32_t defaultNumIddsPerBlock = 64u;
static constexpr uint32_t commandListimmediateIddsPerBlock = 1u;
CommandList() = delete;
CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {}
CommandList(uint32_t numIddsPerBlock);
template <typename Type>
struct Allocator {
@@ -261,6 +267,14 @@ struct CommandList : _ze_command_list_handle_t {
return commandsToPatch;
}
std::vector<CmdListReturnPoint> &getReturnPoints() {
return returnPoints;
}
uint32_t getReturnPointsSize() const {
return static_cast<uint32_t>(returnPoints.size());
}
void makeResidentAndMigrate(bool);
void migrateSharedAllocations();
@@ -287,6 +301,7 @@ struct CommandList : _ze_command_list_handle_t {
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
std::vector<NEO::GraphicsAllocation *> patternAllocations;
std::vector<CmdListReturnPoint> returnPoints;
NEO::StreamProperties requiredStreamState{};
NEO::StreamProperties finalStreamState{};
@@ -301,6 +316,7 @@ struct CommandList : _ze_command_list_handle_t {
bool containsCooperativeKernelsFlag = false;
bool containsStatelessUncachedResource = false;
bool performMemoryPrefetch = false;
bool multiReturnPointCommandList = false;
};
using CommandListAllocatorFn = CommandList *(*)(uint32_t);

View File

@@ -112,6 +112,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
}
this->ownedPrivateAllocations.clear();
cmdListCurrentStartOffset = 0;
this->returnPoints.clear();
return ZE_RESULT_SUCCESS;
}
@@ -122,6 +123,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
this->commandListPreemptionMode = device->getDevicePreemptionMode();
this->engineGroupType = engineGroupType;
this->flags = flags;
if (this->multiReturnPointCommandList) {
this->returnPoints.reserve(32);
}
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
@@ -2323,11 +2327,25 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
auto logicalStateHelperBlock = !getLogicalStateHelper();
if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed && logicalStateHelperBlock) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
if (isPatchingVfeStateAllowed) {
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
auto pVfeState = new VFE_STATE_TYPE;
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
}
if (this->multiReturnPointCommandList) {
auto &stream = *commandContainer.getCommandStream();
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferEnd(stream);
CmdListReturnPoint returnPoint = {
{},
stream.getGpuBase() + stream.getUsed(),
stream.getGraphicsAllocation()};
returnPoint.configSnapshot.frontEndState.setProperties(finalStreamState.frontEndState);
returnPoints.push_back(returnPoint);
}
}
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);

View File

@@ -25,6 +25,12 @@
namespace L0 {
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
if (NEO::DebugManager.flags.MultiReturnPointCommandList.get() != -1) {
multiReturnPointCommandList = !!NEO::DebugManager.flags.MultiReturnPointCommandList.get();
}
}
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};

View File

@@ -38,6 +38,11 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
if (overrideUseKmdWaitFunction != -1) {
useKmdWaitFunction = !!(overrideUseKmdWaitFunction);
}
int overrideMultiReturnPointCommandList = NEO::DebugManager.flags.MultiReturnPointCommandList.get();
if (overrideMultiReturnPointCommandList != -1) {
multiReturnPointCommandList = !!(overrideMultiReturnPointCommandList);
}
}
ze_result_t CommandQueueImp::destroy() {

View File

@@ -60,6 +60,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool commandQueueDebugCmdsProgrammed = false;
bool isCopyOnlyCommandQueue = false;
bool internalUsage = false;
bool multiReturnPointCommandList = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@@ -44,6 +44,7 @@
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/tools/source/metrics/metric.h"
#include <algorithm>
#include <limits>
#include <thread>
@@ -281,15 +282,21 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListFrontEndIfDirty(
auto &streamProperties = this->csr->getStreamProperties();
bool shouldProgramVfe = ctx.frontEndStateDirty;
if (isPatchingVfeStateAllowed) {
ctx.cmdListBeginState.frontEndState = {};
if (isPatchingVfeStateAllowed || this->multiReturnPointCommandList) {
auto &requiredStreamState = commandList->getRequiredStreamState();
streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState);
streamProperties.frontEndState.setPropertySingleSliceDispatchCcsMode(ctx.engineInstanced, device->getHwInfo());
shouldProgramVfe |= streamProperties.frontEndState.isDirty();
}
ctx.cmdListBeginState.frontEndState.setProperties(streamProperties.frontEndState);
this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, cmdStream);
if (isPatchingVfeStateAllowed) {
if (isPatchingVfeStateAllowed || this->multiReturnPointCommandList) {
auto &finalStreamState = commandList->getFinalStreamState();
streamProperties.frontEndState.setProperties(finalStreamState.frontEndState);
}
@@ -341,7 +348,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
auto singleFrontEndCmdSize = estimateFrontEndCmdSize();
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (!isPatchingVfeStateAllowed) {
if (!isPatchingVfeStateAllowed && !this->multiReturnPointCommandList) {
return isFrontEndStateDirty * singleFrontEndCmdSize;
}
@@ -352,11 +359,16 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
auto commandList = CommandList::fromHandle(phCommandLists[i]);
auto &requiredStreamState = commandList->getRequiredStreamState();
streamPropertiesCopy.frontEndState.setProperties(requiredStreamState.frontEndState);
streamPropertiesCopy.frontEndState.setPropertySingleSliceDispatchCcsMode(engineInstanced, device->getHwInfo());
if (isFrontEndStateDirty || streamPropertiesCopy.frontEndState.isDirty()) {
estimatedSize += singleFrontEndCmdSize;
isFrontEndStateDirty = false;
}
if (this->multiReturnPointCommandList) {
uint32_t frontEndChanges = commandList->getReturnPointsSize();
estimatedSize += (frontEndChanges * singleFrontEndCmdSize);
estimatedSize += (frontEndChanges * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
}
auto &finalStreamState = commandList->getFinalStreamState();
streamPropertiesCopy.frontEndState.setProperties(finalStreamState.frontEndState);
}
@@ -594,11 +606,11 @@ void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecut
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
auto &streamProperties = this->csr->getStreamProperties();
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
if (!isPatchingVfeStateAllowed) {
if (!isPatchingVfeStateAllowed && !this->multiReturnPointCommandList) {
streamProperties.frontEndState.setProperties(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion,
disableOverdispatch, isEngineInstanced, hwInfo);
} else {
streamProperties.frontEndState.setPropertySingleSliceDispatchCcsMode(isEngineInstanced, hwInfo);
ctx.engineInstanced = isEngineInstanced;
}
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
ctx.frontEndStateDirty |= csr->getMediaVFEStateDirty();
@@ -836,11 +848,15 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &cmdStream, CommandListExecutionContext &ctx) {
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
auto cmdBufferCount = cmdBufferAllocations.size();
bool isCommandListImmediate = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
auto &returnPoints = commandList->getReturnPoints();
uint32_t returnPointsSize = commandList->getReturnPointsSize();
uint32_t cmdBufferProgress = 0;
uint32_t returnPointIdx = 0;
for (size_t iter = 0; iter < cmdBufferCount; iter++) {
auto allocation = cmdBufferAllocations[iter];
uint64_t startOffset = allocation->getGpuAddress();
@@ -848,6 +864,29 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
startOffset = ptrOffset(allocation->getGpuAddress(), commandList->commandContainer.currentLinearStreamStartOffset);
}
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream, startOffset, true);
if (returnPointsSize > 0) {
bool cmdBufferHasRestarts = std::find_if(
std::next(returnPoints.begin(), cmdBufferProgress),
returnPoints.end(),
[allocation](CmdListReturnPoint &retPt) {
return retPt.currentCmdBuffer == allocation;
}) != returnPoints.end();
if (cmdBufferHasRestarts) {
while (returnPointIdx < returnPointsSize && allocation == returnPoints[returnPointIdx].currentCmdBuffer) {
auto scratchSpaceController = this->csr->getScratchSpaceController();
ctx.cmdListBeginState.frontEndState.setProperties(returnPoints[returnPointIdx].configSnapshot.frontEndState);
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
scratchSpaceController->getPerThreadScratchSpaceSize(),
cmdStream,
ctx.cmdListBeginState);
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream,
returnPoints[returnPointIdx].gpuAddress,
true);
returnPointIdx++;
}
cmdBufferProgress++;
}
}
}
}

View File

@@ -7,6 +7,8 @@
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "shared/source/os_interface/hw_info_config.h"
namespace L0 {
namespace ult {
@@ -69,5 +71,47 @@ void MultiTileCommandListFixtureInit::setUpParams(bool createImmediate, bool cre
event = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
}
void MultiReturnCommandListFixture::setUp() {
DebugManager.flags.MultiReturnPointCommandList.set(1);
ModuleImmutableDataFixture::setUp();
auto revId = NEO::HwInfoConfig::get(device->getHwInfo().platform.eProductFamily)->getHwRevIdFromStepping(REVISION_B, device->getHwInfo());
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = revId;
ze_result_t returnValue;
ze_command_queue_desc_t queueDesc{};
queueDesc.ordinal = 0u;
queueDesc.index = 0u;
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&queueDesc,
false,
false,
returnValue));
NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)));
mockKernelImmData = std::make_unique<MockImmutableData>(0u);
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
createKernel(kernel.get());
}
void MultiReturnCommandListFixture::tearDown() {
commandQueue->destroy();
commandList.reset(nullptr);
kernel.reset(nullptr);
mockKernelImmData.reset(nullptr);
ModuleImmutableDataFixture::tearDown();
}
} // namespace ult
} // namespace L0

View File

@@ -14,7 +14,9 @@
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
namespace L0 {
namespace ult {
@@ -146,5 +148,17 @@ void validateTimestampRegisters(GenCmdList &cmdList,
startIt = itor;
}
struct MultiReturnCommandListFixture : public ModuleImmutableDataFixture {
void setUp();
void tearDown();
DebugManagerStateRestore restorer;
std::unique_ptr<MockImmutableData> mockKernelImmData;
std::unique_ptr<L0::ult::CommandList> commandList;
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
L0::ult::CommandQueue *commandQueue;
};
} // namespace ult
} // namespace L0

View File

@@ -60,6 +60,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::hostPtrMap;
using BaseClass::indirectAllocationsAllowed;
using BaseClass::initialize;
using BaseClass::multiReturnPointCommandList;
using BaseClass::partitionCount;
using BaseClass::patternAllocations;
using BaseClass::requiredStreamState;
@@ -120,6 +121,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::csr;
using BaseClass::finalStreamState;
using BaseClass::isFlushTaskSubmissionEnabled;
using BaseClass::multiReturnPointCommandList;
using BaseClass::partitionCount;
using BaseClass::requiredStreamState;
@@ -141,6 +143,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::commandListPreemptionMode;
using BaseClass::csr;
using BaseClass::initialize;
using BaseClass::multiReturnPointCommandList;
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;

View File

@@ -34,6 +34,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using BaseClass::taskCount;
using CommandQueue::activeSubDevices;
using CommandQueue::internalUsage;
using CommandQueue::multiReturnPointCommandList;
using CommandQueue::partitionCount;
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
@@ -63,6 +64,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using BaseClass::printfFunctionContainer;
using L0::CommandQueue::activeSubDevices;
using L0::CommandQueue::internalUsage;
using L0::CommandQueue::multiReturnPointCommandList;
using L0::CommandQueue::partitionCount;
using L0::CommandQueue::preemptionCmdSyncProgramming;
using L0::CommandQueueImp::csr;

View File

@@ -1979,5 +1979,12 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
}
TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingMultiReturnPointFlagThenDefaultValuseIsFalse) {
ze_result_t returnValue;
std::unique_ptr<L0::ult::CommandList> commandList(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
ASSERT_NE(nullptr, commandList.get());
EXPECT_FALSE(commandList->multiReturnPointCommandList);
}
} // namespace ult
} // namespace L0

View File

@@ -5,6 +5,9 @@
*
*/
#include "shared/source/os_interface/hw_info_config.h"
#include "shared/test/common/cmd_parse/hw_parse.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
#include "shared/test/common/mocks/ult_device_factory.h"
#include "shared/test/common/test_macros/hw_test.h"
@@ -553,5 +556,833 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize);
}
using MultiReturnCommandListTest = Test<MultiReturnCommandListFixture>;
HWTEST2_F(MultiReturnCommandListTest, givenMultiReturnIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->multiReturnPointCommandList);
auto &cmdStream = *commandList->commandContainer.getCommandStream();
auto &cmdBuffers = commandList->commandContainer.getCmdBufferAllocations();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
size_t usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
size_t usedAfter = cmdStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.disableEuFusion) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_NE(nullptr, bbEndCmd);
ASSERT_EQ(1u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[0];
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
EXPECT_EQ(1u, cmdBuffers.size());
EXPECT_EQ(cmdBuffers[0], returnPoint.currentCmdBuffer);
} else {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
usedAfter = cmdStream.getUsed();
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.disableEuFusion) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(1u, commandList->getReturnPointsSize());
} else {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
cmdStream.getSpace(cmdStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
auto oldCmdBuffer = cmdStream.getGraphicsAllocation();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
usedBefore = 0;
usedAfter = cmdStream.getUsed();
auto newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.disableEuFusion) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_NE(nullptr, bbEndCmd);
ASSERT_EQ(2u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[1];
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
EXPECT_EQ(0, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
EXPECT_EQ(2u, cmdBuffers.size());
EXPECT_EQ(cmdBuffers[1], returnPoint.currentCmdBuffer);
}
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
cmdStream.getSpace(cmdStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
usedBefore = cmdStream.getUsed();
void *oldBase = cmdStream.getCpuBase();
oldCmdBuffer = cmdStream.getGraphicsAllocation();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
cmdList.clear();
size_t parseSpace = sizeof(MI_BATCH_BUFFER_END);
if (fePropertiesSupport.disableEuFusion) {
parseSpace *= 2;
}
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(oldBase, usedBefore),
parseSpace));
if (fePropertiesSupport.disableEuFusion) {
ASSERT_EQ(2u, cmdList.size());
for (auto &cmd : cmdList) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
EXPECT_NE(nullptr, bbEndCmd);
}
ASSERT_EQ(3u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[2];
uint64_t expectedGpuAddress = oldCmdBuffer->getGpuAddress() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(oldCmdBuffer, returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
EXPECT_EQ(3u, cmdBuffers.size());
EXPECT_EQ(cmdBuffers[1], returnPoint.currentCmdBuffer);
} else {
ASSERT_EQ(1u, cmdList.size());
for (auto &cmd : cmdList) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
EXPECT_NE(nullptr, bbEndCmd);
}
}
if (fePropertiesSupport.disableEuFusion) {
commandList->reset();
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
}
HWTEST2_F(MultiReturnCommandListTest, givenMultiReturnIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->multiReturnPointCommandList);
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
auto &cmdStream = *commandList->commandContainer.getCommandStream();
auto &cmdBuffers = commandList->commandContainer.getCmdBufferAllocations();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
size_t usedBefore = cmdStream.getUsed();
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
size_t usedAfter = cmdStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.computeDispatchAllWalker) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_NE(nullptr, bbEndCmd);
EXPECT_EQ(1u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[0];
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
} else {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
usedBefore = cmdStream.getUsed();
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
usedAfter = cmdStream.getUsed();
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.computeDispatchAllWalker) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(1u, commandList->getReturnPointsSize());
} else {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
auto oldCmdBuffer = cmdStream.getGraphicsAllocation();
void *oldBase = cmdStream.getCpuBase();
cmdStream.getSpace(cmdStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
auto newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
cmdList.clear();
size_t parseSpace = sizeof(MI_BATCH_BUFFER_END);
if (fePropertiesSupport.computeDispatchAllWalker) {
parseSpace *= 2;
}
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(oldBase, usedBefore),
parseSpace));
if (fePropertiesSupport.computeDispatchAllWalker) {
ASSERT_EQ(2u, cmdList.size());
for (auto &cmd : cmdList) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
EXPECT_NE(nullptr, bbEndCmd);
}
ASSERT_EQ(2u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[1];
uint64_t expectedGpuAddress = oldCmdBuffer->getGpuAddress() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(oldCmdBuffer, returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
EXPECT_EQ(0, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(2u, cmdBuffers.size());
EXPECT_EQ(cmdBuffers[0], returnPoint.currentCmdBuffer);
} else {
ASSERT_EQ(1u, cmdList.size());
for (auto &cmd : cmdList) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
EXPECT_NE(nullptr, bbEndCmd);
}
}
cmdStream.getSpace(cmdStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
oldCmdBuffer = cmdStream.getGraphicsAllocation();
usedBefore = 0;
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
usedAfter = cmdStream.getUsed();
newCmdBuffer = cmdStream.getGraphicsAllocation();
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
cmdList.clear();
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
if (fePropertiesSupport.computeDispatchAllWalker) {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_NE(nullptr, bbEndCmd);
ASSERT_EQ(3u, commandList->getReturnPointsSize());
auto &returnPoint = commandList->getReturnPoints()[2];
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
EXPECT_EQ(3u, cmdBuffers.size());
EXPECT_EQ(cmdBuffers[2], returnPoint.currentCmdBuffer);
} else {
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
EXPECT_EQ(nullptr, bbEndCmd);
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
if (fePropertiesSupport.computeDispatchAllWalker) {
commandList->reset();
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
}
HWTEST2_F(MultiReturnCommandListTest,
givenMultiReturnCmdListIsExecutedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->multiReturnPointCommandList);
EXPECT_TRUE(commandQueue->multiReturnPointCommandList);
auto &cmdListStream = *commandList->commandContainer.getCommandStream();
auto &cmdListBuffers = commandList->commandContainer.getCmdBufferAllocations();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(3u, commandList->getReturnPointsSize());
} else {
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
auto &returnPoints = commandList->getReturnPoints();
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(3u, cmdListBuffers.size());
auto &cmdQueueStream = *commandQueue->commandStream;
size_t usedBefore = cmdQueueStream.getUsed();
auto cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = cmdQueueStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
auto nextIt = cmdList.begin();
if (fePropertiesSupport.disableEuFusion) {
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
EXPECT_EQ(4u, feCmdList.size());
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
EXPECT_EQ(6u, bbStartCmdList.size());
// initial FE -> requiresDisabledEUFusion = 0
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// initial jump to 1st cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> requiresDisabledEUFusion = 1
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 1st cmd buffer after reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[0].gpuAddress;
EXPECT_EQ(cmdListBuffers[0], returnPoints[0].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 2nd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> requiresDisabledEUFusion = 0
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 2nd cmd buffer after 2nd reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[1].gpuAddress;
EXPECT_EQ(cmdListBuffers[1], returnPoints[1].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> requiresDisabledEUFusion = 1
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 2nd cmd buffer after 3rd reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[2].gpuAddress;
EXPECT_EQ(cmdListBuffers[1], returnPoints[2].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 3rd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
} else {
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
EXPECT_EQ(1u, feCmdList.size());
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
EXPECT_EQ(3u, bbStartCmdList.size());
// initial FE
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 1st cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 2nd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 3rd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
}
}
HWTEST2_F(MultiReturnCommandListTest,
givenMultiReturnCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
EXPECT_TRUE(commandList->multiReturnPointCommandList);
EXPECT_TRUE(commandQueue->multiReturnPointCommandList);
auto &cmdListStream = *commandList->commandContainer.getCommandStream();
auto &cmdListBuffers = commandList->commandContainer.getCmdBufferAllocations();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
EXPECT_EQ(3u, commandList->getReturnPointsSize());
} else {
EXPECT_EQ(0u, commandList->getReturnPointsSize());
}
auto &returnPoints = commandList->getReturnPoints();
result = commandList->close();
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_EQ(3u, cmdListBuffers.size());
auto &cmdQueueStream = *commandQueue->commandStream;
size_t usedBefore = cmdQueueStream.getUsed();
auto cmdListHandle = commandList->toHandle();
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
size_t usedAfter = cmdQueueStream.getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
(usedAfter - usedBefore)));
ASSERT_NE(0u, cmdList.size());
auto nextIt = cmdList.begin();
if (fePropertiesSupport.computeDispatchAllWalker) {
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
EXPECT_EQ(4u, feCmdList.size());
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
EXPECT_EQ(6u, bbStartCmdList.size());
// initial FE -> computeDispatchAllWalker = 0
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// initial jump to 1st cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> computeDispatchAllWalker = 1
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 1st cmd buffer after reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[0].gpuAddress;
EXPECT_EQ(cmdListBuffers[0], returnPoints[0].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> requiresDisabledEUFusion = 0
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 2nd cmd buffer after 2nd reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[1].gpuAddress;
EXPECT_EQ(cmdListBuffers[0], returnPoints[1].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 2nd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 3rd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = bbStartIt;
}
// reconfiguration FE -> requiresDisabledEUFusion = 1
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 3nd cmd buffer after 3rd reconfiguration
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = returnPoints[2].gpuAddress;
EXPECT_EQ(cmdListBuffers[2], returnPoints[2].currentCmdBuffer);
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
} else {
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
EXPECT_EQ(1u, feCmdList.size());
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
EXPECT_EQ(3u, bbStartCmdList.size());
// initial FE
{
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), feStateIt);
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
nextIt = feStateIt;
}
// jump to 1st cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 2nd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
nextIt = ++bbStartIt;
}
// jump to 3rd cmd buffer
{
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
ASSERT_NE(cmdList.end(), bbStartIt);
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
}
}
} // namespace ult
} // namespace L0

View File

@@ -1719,5 +1719,23 @@ TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommand
commandQueue->destroy();
}
TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingMultiReturnPointFlagThenDefaultValuseIsFalse) {
const ze_command_queue_desc_t desc{};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
device,
neoDevice->getDefaultEngine().commandStreamReceiver,
&desc,
false,
false,
returnValue));
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
ASSERT_NE(nullptr, commandQueue);
EXPECT_FALSE(commandQueue->multiReturnPointCommandList);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0