mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-21 09:14:47 +08:00
Add multi return points command lists for front end reconfiguration
This change gives fine grain control over front end configuration for each kernel. As it gives possible to inject FE command in command queue and return to exact place in command list. Programming commands in queue makes patching commands in command lists not needed as that operation is costly. And it allows to program context information for each command list too. Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
7d164ec118
commit
b6e2d2df8b
@@ -35,12 +35,18 @@ struct CmdListKernelLaunchParams {
|
||||
bool isDestinationAllocationInSystemMemory = false;
|
||||
};
|
||||
|
||||
struct CmdListReturnPoint {
|
||||
NEO::StreamProperties configSnapshot;
|
||||
uint64_t gpuAddress = 0;
|
||||
NEO::GraphicsAllocation *currentCmdBuffer = nullptr;
|
||||
};
|
||||
|
||||
struct CommandList : _ze_command_list_handle_t {
|
||||
static constexpr uint32_t defaultNumIddsPerBlock = 64u;
|
||||
static constexpr uint32_t commandListimmediateIddsPerBlock = 1u;
|
||||
|
||||
CommandList() = delete;
|
||||
CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {}
|
||||
CommandList(uint32_t numIddsPerBlock);
|
||||
|
||||
template <typename Type>
|
||||
struct Allocator {
|
||||
@@ -261,6 +267,14 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
return commandsToPatch;
|
||||
}
|
||||
|
||||
std::vector<CmdListReturnPoint> &getReturnPoints() {
|
||||
return returnPoints;
|
||||
}
|
||||
|
||||
uint32_t getReturnPointsSize() const {
|
||||
return static_cast<uint32_t>(returnPoints.size());
|
||||
}
|
||||
|
||||
void makeResidentAndMigrate(bool);
|
||||
void migrateSharedAllocations();
|
||||
|
||||
@@ -287,6 +301,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
std::map<const void *, NEO::GraphicsAllocation *> hostPtrMap;
|
||||
std::vector<NEO::GraphicsAllocation *> ownedPrivateAllocations;
|
||||
std::vector<NEO::GraphicsAllocation *> patternAllocations;
|
||||
std::vector<CmdListReturnPoint> returnPoints;
|
||||
|
||||
NEO::StreamProperties requiredStreamState{};
|
||||
NEO::StreamProperties finalStreamState{};
|
||||
@@ -301,6 +316,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool containsCooperativeKernelsFlag = false;
|
||||
bool containsStatelessUncachedResource = false;
|
||||
bool performMemoryPrefetch = false;
|
||||
bool multiReturnPointCommandList = false;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
|
||||
@@ -112,6 +112,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
}
|
||||
this->ownedPrivateAllocations.clear();
|
||||
cmdListCurrentStartOffset = 0;
|
||||
this->returnPoints.clear();
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
@@ -122,6 +123,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
this->commandListPreemptionMode = device->getDevicePreemptionMode();
|
||||
this->engineGroupType = engineGroupType;
|
||||
this->flags = flags;
|
||||
if (this->multiReturnPointCommandList) {
|
||||
this->returnPoints.reserve(32);
|
||||
}
|
||||
|
||||
if (device->isImplicitScalingCapable() && !this->internalUsage && !isCopyOnly()) {
|
||||
this->partitionCount = static_cast<uint32_t>(this->device->getNEODevice()->getDeviceBitfield().count());
|
||||
@@ -2323,11 +2327,25 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
auto logicalStateHelperBlock = !getLogicalStateHelper();
|
||||
if (finalStreamState.frontEndState.isDirty() && isPatchingVfeStateAllowed && logicalStateHelperBlock) {
|
||||
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
|
||||
auto pVfeState = new VFE_STATE_TYPE;
|
||||
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
|
||||
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
|
||||
|
||||
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
|
||||
auto pVfeState = new VFE_STATE_TYPE;
|
||||
NEO::PreambleHelper<GfxFamily>::programVfeState(pVfeState, hwInfo, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
|
||||
commandsToPatch.push_back({pVfeStateAddress, pVfeState, CommandToPatch::FrontEndState});
|
||||
}
|
||||
if (this->multiReturnPointCommandList) {
|
||||
auto &stream = *commandContainer.getCommandStream();
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferEnd(stream);
|
||||
|
||||
CmdListReturnPoint returnPoint = {
|
||||
{},
|
||||
stream.getGpuBase() + stream.getUsed(),
|
||||
stream.getGraphicsAllocation()};
|
||||
returnPoint.configSnapshot.frontEndState.setProperties(finalStreamState.frontEndState);
|
||||
returnPoints.push_back(returnPoint);
|
||||
}
|
||||
}
|
||||
|
||||
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
||||
|
||||
@@ -25,6 +25,12 @@
|
||||
|
||||
namespace L0 {
|
||||
|
||||
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
|
||||
if (NEO::DebugManager.flags.MultiReturnPointCommandList.get() != -1) {
|
||||
multiReturnPointCommandList = !!NEO::DebugManager.flags.MultiReturnPointCommandList.get();
|
||||
}
|
||||
}
|
||||
|
||||
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
||||
CommandListAllocatorFn commandListFactoryImmediate[IGFX_MAX_PRODUCT] = {};
|
||||
|
||||
|
||||
@@ -38,6 +38,11 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
|
||||
if (overrideUseKmdWaitFunction != -1) {
|
||||
useKmdWaitFunction = !!(overrideUseKmdWaitFunction);
|
||||
}
|
||||
|
||||
int overrideMultiReturnPointCommandList = NEO::DebugManager.flags.MultiReturnPointCommandList.get();
|
||||
if (overrideMultiReturnPointCommandList != -1) {
|
||||
multiReturnPointCommandList = !!(overrideMultiReturnPointCommandList);
|
||||
}
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::destroy() {
|
||||
|
||||
@@ -60,6 +60,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||
bool commandQueueDebugCmdsProgrammed = false;
|
||||
bool isCopyOnlyCommandQueue = false;
|
||||
bool internalUsage = false;
|
||||
bool multiReturnPointCommandList = false;
|
||||
};
|
||||
|
||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
||||
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "level_zero/core/source/fence/fence.h"
|
||||
#include "level_zero/tools/source/metrics/metric.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
#include <thread>
|
||||
|
||||
@@ -281,15 +282,21 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListFrontEndIfDirty(
|
||||
auto &streamProperties = this->csr->getStreamProperties();
|
||||
bool shouldProgramVfe = ctx.frontEndStateDirty;
|
||||
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
ctx.cmdListBeginState.frontEndState = {};
|
||||
|
||||
if (isPatchingVfeStateAllowed || this->multiReturnPointCommandList) {
|
||||
auto &requiredStreamState = commandList->getRequiredStreamState();
|
||||
streamProperties.frontEndState.setProperties(requiredStreamState.frontEndState);
|
||||
streamProperties.frontEndState.setPropertySingleSliceDispatchCcsMode(ctx.engineInstanced, device->getHwInfo());
|
||||
|
||||
shouldProgramVfe |= streamProperties.frontEndState.isDirty();
|
||||
}
|
||||
|
||||
ctx.cmdListBeginState.frontEndState.setProperties(streamProperties.frontEndState);
|
||||
|
||||
this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, cmdStream);
|
||||
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
if (isPatchingVfeStateAllowed || this->multiReturnPointCommandList) {
|
||||
auto &finalStreamState = commandList->getFinalStreamState();
|
||||
streamProperties.frontEndState.setProperties(finalStreamState.frontEndState);
|
||||
}
|
||||
@@ -341,7 +348,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
|
||||
|
||||
auto singleFrontEndCmdSize = estimateFrontEndCmdSize();
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (!isPatchingVfeStateAllowed) {
|
||||
if (!isPatchingVfeStateAllowed && !this->multiReturnPointCommandList) {
|
||||
return isFrontEndStateDirty * singleFrontEndCmdSize;
|
||||
}
|
||||
|
||||
@@ -352,11 +359,16 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[i]);
|
||||
auto &requiredStreamState = commandList->getRequiredStreamState();
|
||||
streamPropertiesCopy.frontEndState.setProperties(requiredStreamState.frontEndState);
|
||||
|
||||
streamPropertiesCopy.frontEndState.setPropertySingleSliceDispatchCcsMode(engineInstanced, device->getHwInfo());
|
||||
if (isFrontEndStateDirty || streamPropertiesCopy.frontEndState.isDirty()) {
|
||||
estimatedSize += singleFrontEndCmdSize;
|
||||
isFrontEndStateDirty = false;
|
||||
}
|
||||
if (this->multiReturnPointCommandList) {
|
||||
uint32_t frontEndChanges = commandList->getReturnPointsSize();
|
||||
estimatedSize += (frontEndChanges * singleFrontEndCmdSize);
|
||||
estimatedSize += (frontEndChanges * NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::getBatchBufferStartSize());
|
||||
}
|
||||
auto &finalStreamState = commandList->getFinalStreamState();
|
||||
streamPropertiesCopy.frontEndState.setProperties(finalStreamState.frontEndState);
|
||||
}
|
||||
@@ -594,11 +606,11 @@ void CommandQueueHw<gfxCoreFamily>::setFrontEndStateProperties(CommandListExecut
|
||||
auto isEngineInstanced = csr->getOsContext().isEngineInstanced();
|
||||
auto &streamProperties = this->csr->getStreamProperties();
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (!isPatchingVfeStateAllowed) {
|
||||
if (!isPatchingVfeStateAllowed && !this->multiReturnPointCommandList) {
|
||||
streamProperties.frontEndState.setProperties(ctx.anyCommandListWithCooperativeKernels, ctx.anyCommandListRequiresDisabledEUFusion,
|
||||
disableOverdispatch, isEngineInstanced, hwInfo);
|
||||
} else {
|
||||
streamProperties.frontEndState.setPropertySingleSliceDispatchCcsMode(isEngineInstanced, hwInfo);
|
||||
ctx.engineInstanced = isEngineInstanced;
|
||||
}
|
||||
ctx.frontEndStateDirty |= (streamProperties.frontEndState.isDirty() && !this->csr->getLogicalStateHelper());
|
||||
ctx.frontEndStateDirty |= csr->getMediaVFEStateDirty();
|
||||
@@ -836,11 +848,15 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandList *commandList, NEO::LinearStream &cmdStream, CommandListExecutionContext &ctx) {
|
||||
|
||||
auto &cmdBufferAllocations = commandList->commandContainer.getCmdBufferAllocations();
|
||||
auto cmdBufferCount = cmdBufferAllocations.size();
|
||||
bool isCommandListImmediate = (commandList->cmdListType == CommandList::CommandListType::TYPE_IMMEDIATE) ? true : false;
|
||||
|
||||
auto &returnPoints = commandList->getReturnPoints();
|
||||
uint32_t returnPointsSize = commandList->getReturnPointsSize();
|
||||
uint32_t cmdBufferProgress = 0;
|
||||
uint32_t returnPointIdx = 0;
|
||||
|
||||
for (size_t iter = 0; iter < cmdBufferCount; iter++) {
|
||||
auto allocation = cmdBufferAllocations[iter];
|
||||
uint64_t startOffset = allocation->getGpuAddress();
|
||||
@@ -848,6 +864,29 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListBatchBufferStart(CommandLis
|
||||
startOffset = ptrOffset(allocation->getGpuAddress(), commandList->commandContainer.currentLinearStreamStartOffset);
|
||||
}
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream, startOffset, true);
|
||||
if (returnPointsSize > 0) {
|
||||
bool cmdBufferHasRestarts = std::find_if(
|
||||
std::next(returnPoints.begin(), cmdBufferProgress),
|
||||
returnPoints.end(),
|
||||
[allocation](CmdListReturnPoint &retPt) {
|
||||
return retPt.currentCmdBuffer == allocation;
|
||||
}) != returnPoints.end();
|
||||
if (cmdBufferHasRestarts) {
|
||||
while (returnPointIdx < returnPointsSize && allocation == returnPoints[returnPointIdx].currentCmdBuffer) {
|
||||
auto scratchSpaceController = this->csr->getScratchSpaceController();
|
||||
ctx.cmdListBeginState.frontEndState.setProperties(returnPoints[returnPointIdx].configSnapshot.frontEndState);
|
||||
programFrontEnd(scratchSpaceController->getScratchPatchAddress(),
|
||||
scratchSpaceController->getPerThreadScratchSpaceSize(),
|
||||
cmdStream,
|
||||
ctx.cmdListBeginState);
|
||||
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferStart(&cmdStream,
|
||||
returnPoints[returnPointIdx].gpuAddress,
|
||||
true);
|
||||
returnPointIdx++;
|
||||
}
|
||||
cmdBufferProgress++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
@@ -69,5 +71,47 @@ void MultiTileCommandListFixtureInit::setUpParams(bool createImmediate, bool cre
|
||||
event = std::unique_ptr<Event>(Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
|
||||
}
|
||||
|
||||
void MultiReturnCommandListFixture::setUp() {
|
||||
DebugManager.flags.MultiReturnPointCommandList.set(1);
|
||||
|
||||
ModuleImmutableDataFixture::setUp();
|
||||
|
||||
auto revId = NEO::HwInfoConfig::get(device->getHwInfo().platform.eProductFamily)->getHwRevIdFromStepping(REVISION_B, device->getHwInfo());
|
||||
neoDevice->getRootDeviceEnvironment().getMutableHardwareInfo()->platform.usRevId = revId;
|
||||
|
||||
ze_result_t returnValue;
|
||||
|
||||
ze_command_queue_desc_t queueDesc{};
|
||||
queueDesc.ordinal = 0u;
|
||||
queueDesc.index = 0u;
|
||||
queueDesc.priority = ZE_COMMAND_QUEUE_PRIORITY_NORMAL;
|
||||
|
||||
commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&queueDesc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
NEO::EngineGroupType engineGroupType = NEO::HwHelper::get(device->getHwInfo().platform.eRenderCoreFamily).getEngineGroupType(neoDevice->getDefaultEngine().getEngineType(), neoDevice->getDefaultEngine().getEngineUsage(), device->getHwInfo());
|
||||
|
||||
commandList.reset(whiteboxCast(CommandList::create(productFamily, device, engineGroupType, 0u, returnValue)));
|
||||
|
||||
mockKernelImmData = std::make_unique<MockImmutableData>(0u);
|
||||
createModuleFromMockBinary(0u, false, mockKernelImmData.get());
|
||||
|
||||
kernel = std::make_unique<ModuleImmutableDataFixture::MockKernel>(module.get());
|
||||
createKernel(kernel.get());
|
||||
}
|
||||
|
||||
void MultiReturnCommandListFixture::tearDown() {
|
||||
commandQueue->destroy();
|
||||
commandList.reset(nullptr);
|
||||
kernel.reset(nullptr);
|
||||
mockKernelImmData.reset(nullptr);
|
||||
ModuleImmutableDataFixture::tearDown();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -14,7 +14,9 @@
|
||||
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
@@ -146,5 +148,17 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
startIt = itor;
|
||||
}
|
||||
|
||||
struct MultiReturnCommandListFixture : public ModuleImmutableDataFixture {
|
||||
void setUp();
|
||||
void tearDown();
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
|
||||
std::unique_ptr<MockImmutableData> mockKernelImmData;
|
||||
std::unique_ptr<L0::ult::CommandList> commandList;
|
||||
std::unique_ptr<ModuleImmutableDataFixture::MockKernel> kernel;
|
||||
L0::ult::CommandQueue *commandQueue;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -60,6 +60,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::hostPtrMap;
|
||||
using BaseClass::indirectAllocationsAllowed;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::patternAllocations;
|
||||
using BaseClass::requiredStreamState;
|
||||
@@ -120,6 +121,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::csr;
|
||||
using BaseClass::finalStreamState;
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::requiredStreamState;
|
||||
|
||||
@@ -141,6 +143,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
||||
using BaseClass::commandListPreemptionMode;
|
||||
using BaseClass::csr;
|
||||
using BaseClass::initialize;
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::nonImmediateLogicalStateHelper;
|
||||
using BaseClass::partitionCount;
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using BaseClass::taskCount;
|
||||
using CommandQueue::activeSubDevices;
|
||||
using CommandQueue::internalUsage;
|
||||
using CommandQueue::multiReturnPointCommandList;
|
||||
using CommandQueue::partitionCount;
|
||||
|
||||
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
|
||||
@@ -63,6 +64,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
using BaseClass::printfFunctionContainer;
|
||||
using L0::CommandQueue::activeSubDevices;
|
||||
using L0::CommandQueue::internalUsage;
|
||||
using L0::CommandQueue::multiReturnPointCommandList;
|
||||
using L0::CommandQueue::partitionCount;
|
||||
using L0::CommandQueue::preemptionCmdSyncProgramming;
|
||||
using L0::CommandQueueImp::csr;
|
||||
|
||||
@@ -1979,5 +1979,12 @@ HWTEST2_F(CommandListCreate, givenNullEventWhenAppendEventAfterWalkerThenNothing
|
||||
EXPECT_EQ(commandList->commandContainer.getCommandStream()->getUsed(), usedBefore);
|
||||
}
|
||||
|
||||
TEST_F(CommandListCreate, givenCreatedCommandListWhenGettingMultiReturnPointFlagThenDefaultValuseIsFalse) {
|
||||
ze_result_t returnValue;
|
||||
std::unique_ptr<L0::ult::CommandList> commandList(whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)));
|
||||
ASSERT_NE(nullptr, commandList.get());
|
||||
EXPECT_FALSE(commandList->multiReturnPointCommandList);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -5,6 +5,9 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/source/os_interface/hw_info_config.h"
|
||||
#include "shared/test/common/cmd_parse/hw_parse.h"
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||
#include "shared/test/common/mocks/ult_device_factory.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
@@ -553,5 +556,833 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
|
||||
EXPECT_EQ(privateScratchPerThreadSize, ultCsr->requiredPrivateScratchSize);
|
||||
}
|
||||
|
||||
using MultiReturnCommandListTest = Test<MultiReturnCommandListFixture>;
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest, givenMultiReturnIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
|
||||
auto &cmdStream = *commandList->commandContainer.getCommandStream();
|
||||
auto &cmdBuffers = commandList->commandContainer.getCmdBufferAllocations();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
size_t usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
size_t usedAfter = cmdStream.getUsed();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
|
||||
ASSERT_EQ(1u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[0];
|
||||
|
||||
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
|
||||
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
|
||||
|
||||
EXPECT_EQ(1u, cmdBuffers.size());
|
||||
EXPECT_EQ(cmdBuffers[0], returnPoint.currentCmdBuffer);
|
||||
} else {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
usedAfter = cmdStream.getUsed();
|
||||
|
||||
cmdList.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(1u, commandList->getReturnPointsSize());
|
||||
} else {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
|
||||
|
||||
cmdStream.getSpace(cmdStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
|
||||
auto oldCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
usedBefore = 0;
|
||||
usedAfter = cmdStream.getUsed();
|
||||
|
||||
auto newCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
|
||||
|
||||
cmdList.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
|
||||
ASSERT_EQ(2u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[1];
|
||||
|
||||
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
|
||||
EXPECT_EQ(0, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
|
||||
|
||||
EXPECT_EQ(2u, cmdBuffers.size());
|
||||
EXPECT_EQ(cmdBuffers[1], returnPoint.currentCmdBuffer);
|
||||
}
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
cmdStream.getSpace(cmdStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
|
||||
|
||||
usedBefore = cmdStream.getUsed();
|
||||
void *oldBase = cmdStream.getCpuBase();
|
||||
oldCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
newCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
|
||||
|
||||
cmdList.clear();
|
||||
|
||||
size_t parseSpace = sizeof(MI_BATCH_BUFFER_END);
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
parseSpace *= 2;
|
||||
}
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(oldBase, usedBefore),
|
||||
parseSpace));
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
ASSERT_EQ(2u, cmdList.size());
|
||||
for (auto &cmd : cmdList) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
}
|
||||
ASSERT_EQ(3u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[2];
|
||||
|
||||
uint64_t expectedGpuAddress = oldCmdBuffer->getGpuAddress() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(oldCmdBuffer, returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.disableEUFusion.isDirty);
|
||||
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.disableEUFusion.value);
|
||||
|
||||
EXPECT_EQ(3u, cmdBuffers.size());
|
||||
EXPECT_EQ(cmdBuffers[1], returnPoint.currentCmdBuffer);
|
||||
} else {
|
||||
ASSERT_EQ(1u, cmdList.size());
|
||||
for (auto &cmd : cmdList) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
}
|
||||
}
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
commandList->reset();
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest, givenMultiReturnIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
|
||||
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
|
||||
|
||||
auto &cmdStream = *commandList->commandContainer.getCommandStream();
|
||||
auto &cmdBuffers = commandList->commandContainer.getCmdBufferAllocations();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
size_t usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
size_t usedAfter = cmdStream.getUsed();
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(1u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[0];
|
||||
|
||||
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
|
||||
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
} else {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
usedAfter = cmdStream.getUsed();
|
||||
|
||||
cmdList.clear();
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(1u, commandList->getReturnPointsSize());
|
||||
} else {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
auto oldCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
void *oldBase = cmdStream.getCpuBase();
|
||||
cmdStream.getSpace(cmdStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
|
||||
usedBefore = cmdStream.getUsed();
|
||||
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
|
||||
auto newCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
|
||||
|
||||
cmdList.clear();
|
||||
|
||||
size_t parseSpace = sizeof(MI_BATCH_BUFFER_END);
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
parseSpace *= 2;
|
||||
}
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(oldBase, usedBefore),
|
||||
parseSpace));
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
ASSERT_EQ(2u, cmdList.size());
|
||||
for (auto &cmd : cmdList) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
}
|
||||
ASSERT_EQ(2u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[1];
|
||||
|
||||
uint64_t expectedGpuAddress = oldCmdBuffer->getGpuAddress() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(oldCmdBuffer, returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
|
||||
EXPECT_EQ(0, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
|
||||
EXPECT_EQ(2u, cmdBuffers.size());
|
||||
EXPECT_EQ(cmdBuffers[0], returnPoint.currentCmdBuffer);
|
||||
} else {
|
||||
ASSERT_EQ(1u, cmdList.size());
|
||||
for (auto &cmd : cmdList) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(cmd);
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
}
|
||||
}
|
||||
|
||||
cmdStream.getSpace(cmdStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
|
||||
oldCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
|
||||
usedBefore = 0;
|
||||
commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
usedAfter = cmdStream.getUsed();
|
||||
|
||||
newCmdBuffer = cmdStream.getGraphicsAllocation();
|
||||
ASSERT_NE(oldCmdBuffer, newCmdBuffer);
|
||||
|
||||
cmdList.clear();
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_NE(nullptr, bbEndCmd);
|
||||
|
||||
ASSERT_EQ(3u, commandList->getReturnPointsSize());
|
||||
auto &returnPoint = commandList->getReturnPoints()[2];
|
||||
|
||||
uint64_t expectedGpuAddress = cmdStream.getGpuBase() + usedBefore + sizeof(MI_BATCH_BUFFER_END);
|
||||
EXPECT_EQ(expectedGpuAddress, returnPoint.gpuAddress);
|
||||
EXPECT_EQ(cmdStream.getGraphicsAllocation(), returnPoint.currentCmdBuffer);
|
||||
EXPECT_TRUE(returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.isDirty);
|
||||
EXPECT_EQ(1, returnPoint.configSnapshot.frontEndState.computeDispatchAllWalkerEnable.value);
|
||||
|
||||
EXPECT_EQ(3u, cmdBuffers.size());
|
||||
EXPECT_EQ(cmdBuffers[2], returnPoint.currentCmdBuffer);
|
||||
} else {
|
||||
auto bbEndCmd = genCmdCast<MI_BATCH_BUFFER_END *>(*cmdList.begin());
|
||||
EXPECT_EQ(nullptr, bbEndCmd);
|
||||
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
commandList->reset();
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest,
|
||||
givenMultiReturnCmdListIsExecutedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
|
||||
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
EXPECT_TRUE(commandQueue->multiReturnPointCommandList);
|
||||
|
||||
auto &cmdListStream = *commandList->commandContainer.getCommandStream();
|
||||
auto &cmdListBuffers = commandList->commandContainer.getCmdBufferAllocations();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
|
||||
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
|
||||
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
EXPECT_EQ(3u, commandList->getReturnPointsSize());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
auto &returnPoints = commandList->getReturnPoints();
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(3u, cmdListBuffers.size());
|
||||
|
||||
auto &cmdQueueStream = *commandQueue->commandStream;
|
||||
size_t usedBefore = cmdQueueStream.getUsed();
|
||||
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
size_t usedAfter = cmdQueueStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
auto nextIt = cmdList.begin();
|
||||
|
||||
if (fePropertiesSupport.disableEuFusion) {
|
||||
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(4u, feCmdList.size());
|
||||
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(6u, bbStartCmdList.size());
|
||||
|
||||
// initial FE -> requiresDisabledEUFusion = 0
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// initial jump to 1st cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
// reconfiguration FE -> requiresDisabledEUFusion = 1
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 1st cmd buffer after reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[0].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[0], returnPoints[0].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
|
||||
// jump to 2nd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
|
||||
// reconfiguration FE -> requiresDisabledEUFusion = 0
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 2nd cmd buffer after 2nd reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[1].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[1], returnPoints[1].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
|
||||
// reconfiguration FE -> requiresDisabledEUFusion = 1
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 2nd cmd buffer after 3rd reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[2].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[1], returnPoints[2].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
|
||||
// jump to 3rd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
} else {
|
||||
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(1u, feCmdList.size());
|
||||
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(3u, bbStartCmdList.size());
|
||||
|
||||
// initial FE
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 1st cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
// jump to 2nd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
// jump to 3rd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST2_F(MultiReturnCommandListTest,
|
||||
givenMultiReturnCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
|
||||
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
|
||||
|
||||
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
|
||||
NEO::HwInfoConfig::get(productFamily)->fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
|
||||
|
||||
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
|
||||
|
||||
EXPECT_TRUE(commandList->multiReturnPointCommandList);
|
||||
EXPECT_TRUE(commandQueue->multiReturnPointCommandList);
|
||||
|
||||
auto &cmdListStream = *commandList->commandContainer.getCommandStream();
|
||||
auto &cmdListBuffers = commandList->commandContainer.getCmdBufferAllocations();
|
||||
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - 2 * sizeof(MI_BATCH_BUFFER_END));
|
||||
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
cmdListStream.getSpace(cmdListStream.getAvailableSpace() - sizeof(MI_BATCH_BUFFER_END));
|
||||
|
||||
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
EXPECT_EQ(3u, commandList->getReturnPointsSize());
|
||||
} else {
|
||||
EXPECT_EQ(0u, commandList->getReturnPointsSize());
|
||||
}
|
||||
|
||||
auto &returnPoints = commandList->getReturnPoints();
|
||||
|
||||
result = commandList->close();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(3u, cmdListBuffers.size());
|
||||
|
||||
auto &cmdQueueStream = *commandQueue->commandStream;
|
||||
size_t usedBefore = cmdQueueStream.getUsed();
|
||||
|
||||
auto cmdListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
size_t usedAfter = cmdQueueStream.getUsed();
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
cmdList,
|
||||
ptrOffset(cmdQueueStream.getCpuBase(), usedBefore),
|
||||
(usedAfter - usedBefore)));
|
||||
ASSERT_NE(0u, cmdList.size());
|
||||
auto nextIt = cmdList.begin();
|
||||
|
||||
if (fePropertiesSupport.computeDispatchAllWalker) {
|
||||
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(4u, feCmdList.size());
|
||||
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(6u, bbStartCmdList.size());
|
||||
|
||||
// initial FE -> computeDispatchAllWalker = 0
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// initial jump to 1st cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
|
||||
// reconfiguration FE -> computeDispatchAllWalker = 1
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 1st cmd buffer after reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[0].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[0], returnPoints[0].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
|
||||
// reconfiguration FE -> requiresDisabledEUFusion = 0
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 2nd cmd buffer after 2nd reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[1].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[0], returnPoints[1].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
|
||||
// jump to 2nd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
|
||||
// jump to 3rd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = bbStartIt;
|
||||
}
|
||||
|
||||
// reconfiguration FE -> requiresDisabledEUFusion = 1
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 3nd cmd buffer after 3rd reconfiguration
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = returnPoints[2].gpuAddress;
|
||||
EXPECT_EQ(cmdListBuffers[2], returnPoints[2].currentCmdBuffer);
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
|
||||
} else {
|
||||
auto feCmdList = findAll<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(1u, feCmdList.size());
|
||||
auto bbStartCmdList = findAll<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
EXPECT_EQ(3u, bbStartCmdList.size());
|
||||
|
||||
// initial FE
|
||||
{
|
||||
auto feStateIt = find<VFE_STATE_TYPE *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), feStateIt);
|
||||
auto &feState = *genCmdCast<VFE_STATE_TYPE *>(*feStateIt);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(feState));
|
||||
|
||||
nextIt = feStateIt;
|
||||
}
|
||||
// jump to 1st cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[0]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
// jump to 2nd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[1]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
|
||||
nextIt = ++bbStartIt;
|
||||
}
|
||||
// jump to 3rd cmd buffer
|
||||
{
|
||||
auto bbStartIt = find<MI_BATCH_BUFFER_START *>(nextIt, cmdList.end());
|
||||
ASSERT_NE(cmdList.end(), bbStartIt);
|
||||
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*bbStartIt);
|
||||
|
||||
uint64_t bbStartGpuAddress = cmdListBuffers[2]->getGpuAddress();
|
||||
|
||||
EXPECT_EQ(bbStartGpuAddress, bbStart->getBatchBufferStartAddress());
|
||||
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -1719,5 +1719,23 @@ TEST_F(CommandQueueCreate, givenOverrideCmdQueueSyncModeToSynchronousWhenCommand
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
TEST_F(CommandQueueCreate, givenCreatedCommandQueueWhenGettingMultiReturnPointFlagThenDefaultValuseIsFalse) {
|
||||
const ze_command_queue_desc_t desc{};
|
||||
ze_result_t returnValue;
|
||||
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily,
|
||||
device,
|
||||
neoDevice->getDefaultEngine().commandStreamReceiver,
|
||||
&desc,
|
||||
false,
|
||||
false,
|
||||
returnValue));
|
||||
|
||||
EXPECT_EQ(returnValue, ZE_RESULT_SUCCESS);
|
||||
ASSERT_NE(nullptr, commandQueue);
|
||||
EXPECT_FALSE(commandQueue->multiReturnPointCommandList);
|
||||
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
Reference in New Issue
Block a user