[feat, perf] add primary batch buffer support to front end properties update

For primary batch buffer command list driver should not use return point.
Return points are useful when batch buffers are dispatched as secondary,
for primary buffers, patching of front end command is more desirable option.

Related-To: NEO-7807

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2023-04-11 19:41:11 +00:00
committed by Compute-Runtime-Automation
parent 62ea1b1a58
commit f12b11786e
6 changed files with 200 additions and 13 deletions

View File

@@ -2568,7 +2568,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
}
finalStreamState.frontEndState.setPropertiesComputeDispatchAllWalkerEnableDisableEuFusion(isCooperative, fusedEuDisabled);
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
bool isPatchingVfeStateAllowed = (NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get() || (this->frontEndStateTracking && this->dispatchCmdListBatchBufferAsPrimary));
if (logicalStateHelperBlock && finalStreamState.frontEndState.isDirty()) {
if (isPatchingVfeStateAllowed) {
auto frontEndStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), device->getHwInfo(), engineGroupType);
@@ -2576,7 +2576,7 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamPropertiesForRegularComma
NEO::PreambleHelper<GfxFamily>::programVfeState(frontEndStateCmd, rootDeviceEnvironment, 0, 0, device->getMaxNumHwThreads(), finalStreamState, nullptr);
commandsToPatch.push_back({frontEndStateAddress, frontEndStateCmd, CommandToPatch::FrontEndState});
}
if (this->frontEndStateTracking) {
if (this->frontEndStateTracking && !this->dispatchCmdListBatchBufferAsPrimary) {
auto &stream = *commandContainer.getCommandStream();
NEO::EncodeBatchBufferStartOrEnd<GfxFamily>::programBatchBufferEnd(stream);

View File

@@ -134,7 +134,8 @@ void ModuleMutableCommandListFixture::tearDown() {
ModuleImmutableDataFixture::tearDown();
}
void MultiReturnCommandListFixture::setUp() {
void FrontEndCommandListFixtureInit::setUp(int32_t dispatchCmdBufferPrimary) {
DebugManager.flags.DispatchCmdlistCmdBufferPrimary.set(dispatchCmdBufferPrimary);
DebugManager.flags.EnableFrontEndTracking.set(1);
DebugManager.flags.EnableFlushTaskSubmission.set(1);
ModuleMutableCommandListFixture::setUp(REVISION_B);

View File

@@ -86,8 +86,18 @@ struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
DebugManagerStateRestore restorer;
};
struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture {
void setUp();
struct FrontEndCommandListFixtureInit : public ModuleMutableCommandListFixture {
void setUp() {
setUp(0);
}
void setUp(int32_t dispatchCmdBufferPrimary);
};
template <int32_t dispatchCmdBufferPrimary>
struct FrontEndCommandListFixture : public FrontEndCommandListFixtureInit {
void setUp() {
FrontEndCommandListFixtureInit::setUp(dispatchCmdBufferPrimary);
}
};
struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixture {

View File

@@ -205,6 +205,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::cmdQImmediate;
using BaseClass::commandContainer;
using BaseClass::commandListPreemptionMode;
using BaseClass::commandsToPatch;
using BaseClass::csr;
using BaseClass::currentBindingTablePoolBaseAddress;
using BaseClass::currentDynamicStateBaseAddress;

View File

@@ -19,7 +19,7 @@
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
@@ -1518,5 +1518,180 @@ HWTEST2_F(CommandListCreate, givenGetAlignedAllocationWhenExternalMemWithinDiffe
commandList->hostPtrMap.clear();
}
using FrontEndPrimaryBatchBufferCommandListTest = Test<FrontEndCommandListFixture<1>>;
HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndAddedToPatchlist,
IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
EXPECT_TRUE(commandList->frontEndStateTracking);
auto &cmdStream = *commandList->getCmdContainer().getCommandStream();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
auto &commandsToPatch = commandList->commandsToPatch;
EXPECT_EQ(0u, commandsToPatch.size());
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
size_t usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
if (fePropertiesSupport.disableEuFusion) {
ASSERT_EQ(1u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[0];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore);
EXPECT_EQ(expectedDestination, cfePatch.pDestination);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
if (fePropertiesSupport.disableEuFusion) {
EXPECT_EQ(1u, commandsToPatch.size());
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 0;
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
if (fePropertiesSupport.disableEuFusion) {
ASSERT_EQ(2u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[1];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore);
EXPECT_EQ(expectedDestination, cfePatch.pDestination);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
}
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.requiresDisabledEUFusion = 1;
usedBefore = cmdStream.getUsed();
commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
if (fePropertiesSupport.disableEuFusion) {
ASSERT_EQ(3u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[2];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
void *expectedDestination = ptrOffset(cmdStream.getCpuBase(), usedBefore);
EXPECT_EQ(expectedDestination, cfePatch.pDestination);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getDisableFusionStateFromFrontEndCommand(*cfeCmd));
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
if (fePropertiesSupport.disableEuFusion) {
commandList->reset();
EXPECT_EQ(0u, commandsToPatch.size());
}
}
HWTEST2_F(FrontEndPrimaryBatchBufferCommandListTest,
givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndAddedToPatchlist,
IsAtLeastXeHpCore) {
using CFE_STATE = typename FamilyType::CFE_STATE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();
productHelper.fillFrontEndPropertiesSupportStructure(fePropertiesSupport, device->getHwInfo());
NEO::DebugManager.flags.AllowMixingRegularAndCooperativeKernels.set(1);
EXPECT_TRUE(commandList->frontEndStateTracking);
EXPECT_TRUE(commandQueue->frontEndStateTracking);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
ze_result_t result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto &commandsToPatch = commandList->commandsToPatch;
EXPECT_EQ(0u, commandsToPatch.size());
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
ASSERT_EQ(1u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[0];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
EXPECT_EQ(1u, commandsToPatch.size());
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
ASSERT_EQ(2u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[1];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
result = commandList->appendLaunchCooperativeKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
if (fePropertiesSupport.computeDispatchAllWalker) {
ASSERT_EQ(3u, commandsToPatch.size());
CommandList::CommandToPatch &cfePatch = commandsToPatch[2];
EXPECT_EQ(CommandList::CommandToPatch::FrontEndState, cfePatch.type);
auto cfeCmd = genCmdCast<CFE_STATE *>(cfePatch.pCommand);
ASSERT_NE(nullptr, cfeCmd);
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getComputeDispatchAllWalkerFromFrontEndCommand(*cfeCmd));
} else {
EXPECT_EQ(0u, commandsToPatch.size());
}
if (fePropertiesSupport.computeDispatchAllWalker) {
commandList->reset();
EXPECT_EQ(0u, commandsToPatch.size());
}
}
} // namespace ult
} // namespace L0

View File

@@ -803,9 +803,9 @@ HWTEST2_F(CmdlistAppendLaunchKernelTests,
commandList->cmdQImmediate = nullptr;
}
using MultiReturnCommandListTest = Test<MultiReturnCommandListFixture>;
using FrontEndMultiReturnCommandListTest = Test<FrontEndCommandListFixture<0>>;
HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDisableEuFusionSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();
@@ -972,7 +972,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyDis
}
}
HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
HWTEST2_F(FrontEndMultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectReturnPointsAndBbEndProgramming, IsAtLeastSkl) {
using MI_BATCH_BUFFER_END = typename FamilyType::MI_BATCH_BUFFER_END;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();
@@ -1134,7 +1134,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenFrontEndTrackingIsUsedWhenPropertyCom
}
}
HWTEST2_F(MultiReturnCommandListTest,
HWTEST2_F(FrontEndMultiReturnCommandListTest,
givenFrontEndTrackingCmdListIsExecutedWhenPropertyDisableEuFusionSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
@@ -1384,7 +1384,7 @@ HWTEST2_F(MultiReturnCommandListTest,
}
}
HWTEST2_F(MultiReturnCommandListTest,
HWTEST2_F(FrontEndMultiReturnCommandListTest,
givenFrontEndTrackingCmdListIsExecutedWhenPropertyComputeDispatchAllWalkerSupportedThenExpectFrontEndProgrammingInCmdQueue, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
@@ -1635,7 +1635,7 @@ HWTEST2_F(MultiReturnCommandListTest,
}
}
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
HWTEST2_F(FrontEndMultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothRegularFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();
@@ -1758,7 +1758,7 @@ HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsr
EXPECT_EQ(0u, feStateCmds.size());
}
HWTEST2_F(MultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
HWTEST2_F(FrontEndMultiReturnCommandListTest, givenCmdQueueAndImmediateCmdListUseSameCsrWhenAppendingKernelOnBothImmediateFirstThenFrontEndStateIsNotChanged, IsAtLeastSkl) {
using VFE_STATE_TYPE = typename FamilyType::VFE_STATE_TYPE;
NEO::FrontEndPropertiesSupport fePropertiesSupport = {};
auto &productHelper = device->getProductHelper();