Use primary buffer start when immediate command list using flush task

Related-To: NEO-7091

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2022-06-28 18:17:24 +00:00
committed by Compute-Runtime-Automation
parent fffd56d7a6
commit 3ed8b4319f
8 changed files with 244 additions and 82 deletions

View File

@ -350,7 +350,7 @@ void CommandListCoreFamily<gfxCoreFamily>::appendMultiTileBarrier(NEO::Device &n
0,
0,
!(cmdListType == CommandListType::TYPE_IMMEDIATE),
true);
!this->isFlushTaskSubmissionEnabled);
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@ -15,6 +15,8 @@ set(L0_FIXTURES_SOURCES
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/module_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/memory_ipc_fixture.h
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.cpp
${CMAKE_CURRENT_SOURCE_DIR}/multi_tile_fixture.h
)
add_library(${TARGET_NAME} OBJECT ${L0_FIXTURES_SOURCES} ${NEO_CORE_tests_compiler_mocks})

View File

@ -0,0 +1,71 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
#include "level_zero/core/source/context/context_imp.h"
namespace L0 {
namespace ult {
void MultiTileCommandListAppendLaunchFunctionFixture::SetUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
MultiDeviceFixture::numSubDevices = 4u;
MultiDeviceModuleFixture::SetUp();
createModuleFromBinary(0u);
createKernel(0u);
device = driverHandle->devices[0];
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
ze_result_t returnValue;
commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
void MultiTileCommandListAppendLaunchFunctionFixture::TearDown() {
commandList->destroy();
contextImp->destroy();
MultiDeviceModuleFixture::TearDown();
}
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::SetUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
MultiDeviceFixture::numSubDevices = 2u;
MultiDeviceModuleFixture::SetUp();
createModuleFromBinary(0u);
createKernel(0u);
device = driverHandle->devices[0];
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
}
void MultiTileImmediateCommandListAppendLaunchFunctionFixture::TearDown() {
contextImp->destroy();
MultiDeviceModuleFixture::TearDown();
}
} // namespace ult
} // namespace L0

View File

@ -0,0 +1,42 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#pragma once
#include "shared/test/common/helpers/variable_backup.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
struct ContextImp;
struct Device;
namespace ult {
struct MultiTileCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
void SetUp();
void TearDown();
ContextImp *contextImp = nullptr;
WhiteBox<::L0::CommandList> *commandList = nullptr;
L0::Device *device = nullptr;
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
};
struct MultiTileImmediateCommandListAppendLaunchFunctionFixture : public MultiDeviceModuleFixture {
void SetUp();
void TearDown();
ContextImp *contextImp = nullptr;
L0::Device *device = nullptr;
VariableBackup<bool> backupApiSupport{&NEO::ImplicitScaling::apiSupport, true};
VariableBackup<bool> backupLocalMemory{&NEO::OSInterface::osEnableLocalMemory, true};
};
} // namespace ult
} // namespace L0

View File

@ -84,7 +84,7 @@ HWTEST_F(CommandListAppendBarrier, GivenEventVsNoEventWhenAppendingBarrierThenCo
template <typename FamilyType>
void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset,
uint64_t gpuFinalSyncAddress, uint64_t gpuCrossTileSyncAddress, uint64_t gpuStartAddress,
bool validateCleanupSection) {
bool validateCleanupSection, bool secondaryBatchBuffer) {
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
@ -130,7 +130,11 @@ void validateMultiTileBarrier(void *cmdBuffer, size_t &parsedOffset,
auto bbStart = genCmdCast<MI_BATCH_BUFFER_START *>(ptrOffset(cmdBuffer, parsedOffset));
ASSERT_NE(nullptr, bbStart);
EXPECT_EQ(gpuStartAddress, bbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
if (secondaryBatchBuffer) {
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
} else {
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, bbStart->getSecondLevelBatchBuffer());
}
parsedOffset += sizeof(MI_BATCH_BUFFER_START);
}
{
@ -236,7 +240,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier, WhenAppendingBarrierThenPipeControl
void *cmdBuffer = ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceBefore);
size_t parsedOffset = 0;
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
EXPECT_EQ(expectedUseBuffer, parsedOffset);
}
@ -298,7 +302,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
void *cmdBuffer = cmdListStream->getCpuBase();
size_t parsedOffset = 0;
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
EXPECT_EQ(expectedUseBuffer, parsedOffset);
}
@ -363,7 +367,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
void *cmdBuffer = ptrOffset(cmdListStream->getCpuBase(), useSizeBefore);
size_t parsedOffset = 0;
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
cmdBuffer = ptrOffset(cmdBuffer, parsedOffset);
@ -485,7 +489,7 @@ HWTEST2_F(MultiTileCommandListAppendBarrier,
cmdBuffer = ptrOffset(cmdBuffer, timestampRegisters);
size_t parsedOffset = 0;
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true);
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, gpuFinalSyncAddress, gpuCrossTileSyncAddress, gpuStartAddress, true, true);
EXPECT_EQ(multiTileBarrierSize, parsedOffset);
cmdBuffer = ptrOffset(cmdBuffer, (parsedOffset + postBarrierSynchronization));
@ -517,6 +521,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
auto immediateCommandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, immediateCommandList);
immediateCommandList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
immediateCommandList->isFlushTaskSubmissionEnabled = true;
ze_result_t returnValue = immediateCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(2u, immediateCommandList->partitionCount);
@ -586,7 +591,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
ASSERT_NE(cmdList.end(), itorBbStart);
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
EXPECT_EQ(bbStartGpuAddress, cmdBbStart->getBatchBufferStartAddress());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
auto atomicCounter = reinterpret_cast<uint32_t *>(ptrOffset(cmdBbStart, sizeof(MI_BATCH_BUFFER_START)));
EXPECT_EQ(0u, *atomicCounter);
@ -599,9 +604,42 @@ HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
void *cmdBuffer = ptrOffset(cmdStream->getCpuBase(), usedBeforeSize);
size_t parsedOffset = 0;
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, 0, crossTileSyncGpuAddress, bbStartGpuAddress, false);
validateMultiTileBarrier<FamilyType>(cmdBuffer, parsedOffset, 0, crossTileSyncGpuAddress, bbStartGpuAddress, false, false);
EXPECT_EQ(expectedSize, parsedOffset);
}
HWTEST2_F(MultiTileImmediateCommandListAppendBarrier,
givenMultiTileImmediateCommandListNotUsingFlushTaskWhenAppendingBarrierThenExpectSecondaryBufferStart, IsWithinXeGfxFamily) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
auto immediateCommandList = std::make_unique<::L0::ult::CommandListCoreFamily<gfxCoreFamily>>();
ASSERT_NE(nullptr, immediateCommandList);
immediateCommandList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
immediateCommandList->isFlushTaskSubmissionEnabled = false;
ze_result_t returnValue = immediateCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
EXPECT_EQ(2u, immediateCommandList->partitionCount);
auto cmdStream = immediateCommandList->commandContainer.getCommandStream();
size_t usedBeforeSize = cmdStream->getUsed();
returnValue = immediateCommandList->appendBarrier(nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
size_t usedAfterSize = cmdStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), usedBeforeSize),
(usedAfterSize - usedBeforeSize)));
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorBbStart);
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
}
} // namespace ult
} // namespace L0

View File

@ -17,6 +17,7 @@
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
@ -1275,44 +1276,7 @@ HWTEST2_F(CommandListAppendLaunchKernel, GivenDebugToggleSetWhenUpdateStreamProp
EXPECT_EQ(defaultThreadArbitrationPolicy, pCommandList->finalStreamState.stateComputeMode.threadArbitrationPolicy.value);
}
struct MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture : public MultiDeviceModuleFixture {
void SetUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
MultiDeviceFixture::numSubDevices = 4u;
MultiDeviceModuleFixture::SetUp();
createModuleFromBinary(0u);
createKernel(0u);
device = driverHandle->devices[0];
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
ze_result_t returnValue;
commandList = whiteboxCast(CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
}
void TearDown() {
commandList->destroy();
contextImp->destroy();
MultiDeviceModuleFixture::TearDown();
}
ContextImp *contextImp = nullptr;
WhiteBox<::L0::CommandList> *commandList = nullptr;
L0::Device *device = nullptr;
VariableBackup<bool> backup{&NEO::ImplicitScaling::apiSupport, true};
};
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionXeHpCoreFixture>;
using MultiTileCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileCommandListAppendLaunchFunctionFixture>;
HWCMDTEST_F(IGFX_XE_HP_CORE, MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingEnabledWhenAppendingKernelWithEventThenAllEventPacketsAreUsed) {
ze_event_pool_desc_t eventPoolDesc = {};
@ -1378,5 +1342,37 @@ HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest, givenCooperative
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
}
HWTEST2_F(MultiTileCommandListAppendLaunchFunctionXeHpCoreTest,
givenRegularCommandListWhenSynchronizationRequiredThenExpectJumpingBbStartCommandToSecondary, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1);
ze_group_count_t groupCount{128, 1, 1};
auto cmdStream = commandList->commandContainer.getCommandStream();
auto sizeBefore = cmdStream->getUsed();
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.get(), &groupCount, nullptr, 0, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto sizeAfter = cmdStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
sizeAfter - sizeBefore));
auto itorWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
auto cmd = genCmdCast<WALKER_TYPE *>(*itorWalker);
EXPECT_TRUE(cmd->getWorkloadPartitionEnable());
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorBbStart);
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
}
} // namespace ult
} // namespace L0

View File

@ -18,6 +18,7 @@
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
#include "level_zero/core/source/event/event.h"
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
@ -694,39 +695,7 @@ HWTEST_F(CommandListAppendLaunchKernelWithImplicitArgs, givenIndirectDispatchWit
context->freeMem(alloc);
}
struct MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreFixture : public MultiDeviceModuleFixture {
void SetUp() {
DebugManager.flags.EnableImplicitScaling.set(1);
MultiDeviceFixture::numRootDevices = 1u;
MultiDeviceFixture::numSubDevices = 2u;
MultiDeviceModuleFixture::SetUp();
createModuleFromBinary(0u);
createKernel(0u);
device = driverHandle->devices[0];
ze_context_handle_t hContext;
ze_context_desc_t desc = {ZE_STRUCTURE_TYPE_CONTEXT_DESC, nullptr, 0};
ze_result_t res = device->getDriverHandle()->createContext(&desc, 0u, nullptr, &hContext);
EXPECT_EQ(ZE_RESULT_SUCCESS, res);
contextImp = static_cast<ContextImp *>(Context::fromHandle(hContext));
}
void TearDown() {
contextImp->destroy();
MultiDeviceModuleFixture::TearDown();
}
ContextImp *contextImp = nullptr;
L0::Device *device = nullptr;
VariableBackup<bool> backupApiSupport{&NEO::ImplicitScaling::apiSupport, true};
VariableBackup<bool> backupLocalMemory{&NEO::OSInterface::osEnableLocalMemory, true};
};
using MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreFixture>;
using MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest = Test<MultiTileImmediateCommandListAppendLaunchFunctionFixture>;
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListThenDoNotAddSelfCleanup, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
@ -742,6 +711,7 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
auto immediateCmdList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
immediateCmdList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
immediateCmdList->isFlushTaskSubmissionEnabled = true;
auto result = immediateCmdList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
@ -778,9 +748,10 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
EXPECT_EQ(itorPipeControl, itorStoreDataImm);
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(itorPipeControl, cmdList.end());
ASSERT_NE(cmdList.end(), itorBbStart);
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
EXPECT_EQ(bbStartGpuAddress, cmdBbStart->getBatchBufferStartAddress());
ASSERT_NE(cmdList.end(), itorBbStart);
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_FIRST_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
auto itorMiAtomic = find<MI_ATOMIC *>(itorBbStart, cmdList.end());
EXPECT_EQ(cmdList.end(), itorMiAtomic);
@ -789,5 +760,47 @@ HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenIm
EXPECT_EQ(cmdList.end(), itorSemaphoreWait);
}
HWTEST2_F(MultiTileImmediateCommandListAppendLaunchFunctionXeHpCoreTest, givenImplicitScalingWhenUsingImmediateCommandListWithoutFlushTaskThenUseSecondaryBuffer, IsAtLeastXeHpCore) {
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
using MI_ATOMIC = typename FamilyType::MI_ATOMIC;
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using PIPE_CONTROL = typename FamilyType::PIPE_CONTROL;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
DebugManager.flags.UsePipeControlAfterPartitionedWalker.set(1);
ze_group_count_t groupCount{128, 1, 1};
auto immediateCmdList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
immediateCmdList->cmdListType = ::L0::CommandList::CommandListType::TYPE_IMMEDIATE;
immediateCmdList->isFlushTaskSubmissionEnabled = false;
auto result = immediateCmdList->initialize(device, NEO::EngineGroupType::Compute, 0u);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto cmdStream = immediateCmdList->commandContainer.getCommandStream();
auto sizeBefore = cmdStream->getUsed();
CmdListKernelLaunchParams launchParams = {};
result = immediateCmdList->appendLaunchKernelWithParams(kernel.get(), &groupCount, nullptr, launchParams);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto sizeAfter = cmdStream->getUsed();
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList,
ptrOffset(cmdStream->getCpuBase(), sizeBefore),
sizeAfter - sizeBefore));
auto itorWalker = find<WALKER_TYPE *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorWalker);
auto cmdWalker = genCmdCast<WALKER_TYPE *>(*itorWalker);
EXPECT_TRUE(cmdWalker->getWorkloadPartitionEnable());
auto itorBbStart = find<MI_BATCH_BUFFER_START *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itorBbStart);
auto cmdBbStart = genCmdCast<MI_BATCH_BUFFER_START *>(*itorBbStart);
EXPECT_EQ(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH, cmdBbStart->getSecondLevelBatchBuffer());
}
} // namespace ult
} // namespace L0

View File

@ -280,7 +280,7 @@ void EncodeDispatchKernel<Family>::encode(CommandContainer &container,
walkerCmd,
args.device->getDeviceBitfield(),
args.partitionCount,
true,
!container.getFlushTaskUsedForImmediate(),
!args.isKernelDispatchedFromImmediateCmdList,
false,
workPartitionAllocationGpuVa,