mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-04 15:53:45 +08:00
Move thread arbitration programming to command list
Thread arbitration policy can be programmed in the command list instead of doing it in executeCommandList(), so move it there. Related-to: NEO-5187 Change-Id: I518e19e939e0548d9d454738c6deb5e095154422 Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
committed by
sys_ocldev
parent
504dfe8a50
commit
b62a121f6f
@@ -204,6 +204,7 @@ struct CommandListCoreFamily : CommandListImp {
|
||||
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
|
||||
bool useMemCopyToBlitFill(size_t patternSize);
|
||||
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
|
||||
void programThreadArbitrationPolicy(Device *device);
|
||||
|
||||
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
|
||||
virtual AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);
|
||||
|
||||
@@ -14,6 +14,7 @@
|
||||
#include "shared/source/helpers/heap_helper.h"
|
||||
#include "shared/source/helpers/hw_helper.h"
|
||||
#include "shared/source/helpers/hw_info.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/helpers/string.h"
|
||||
#include "shared/source/helpers/surface_format_info.h"
|
||||
@@ -50,6 +51,32 @@ inline ze_result_t parseErrorCode(NEO::ErrorCode returnValue) {
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
|
||||
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
|
||||
}
|
||||
NEO::PreambleHelper<GfxFamily>::programThreadArbitration(commandContainer.getCommandStream(), threadArbitrationPolicy);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
printfFunctionContainer.clear();
|
||||
removeDeallocationContainerData();
|
||||
removeHostPtrAllocations();
|
||||
commandContainer.reset();
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
programStateBaseAddress(commandContainer, true);
|
||||
programThreadArbitrationPolicy(device);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO::EngineGroupType engineGroupType) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
@@ -63,6 +90,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
if (returnType == ZE_RESULT_SUCCESS) {
|
||||
if (!isCopyOnly()) {
|
||||
programStateBaseAddress(commandContainer, false);
|
||||
programThreadArbitrationPolicy(device);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1591,20 +1619,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
|
||||
printfFunctionContainer.clear();
|
||||
removeDeallocationContainerData();
|
||||
removeHostPtrAllocations();
|
||||
commandContainer.reset();
|
||||
|
||||
if (!isCopyOnly()) {
|
||||
programStateBaseAddress(commandContainer, true);
|
||||
}
|
||||
|
||||
return ZE_RESULT_SUCCESS;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) {
|
||||
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
|
||||
|
||||
@@ -81,7 +81,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
size_t spaceForResidency = 0;
|
||||
size_t preemptionSize = 0u;
|
||||
size_t debuggerCmdsSize = 0;
|
||||
size_t threadArbitrationCmdSize = 0;
|
||||
constexpr size_t residencyContainerSpaceForPreemption = 2;
|
||||
constexpr size_t residencyContainerSpaceForFence = 1;
|
||||
constexpr size_t residencyContainerSpaceForTagWrite = 1;
|
||||
@@ -97,8 +96,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
statePreemption = devicePreemption;
|
||||
}
|
||||
|
||||
threadArbitrationCmdSize = NEO::PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
|
||||
|
||||
if (!commandQueueDebugCmdsProgrammed) {
|
||||
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(neoDevice->isDebuggerActive());
|
||||
}
|
||||
@@ -188,7 +185,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += threadArbitrationCmdSize + preemptionSize + debuggerCmdsSize;
|
||||
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
|
||||
@@ -237,14 +234,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
|
||||
statePreemption = commandQueuePreemptionMode;
|
||||
}
|
||||
|
||||
auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
|
||||
uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
|
||||
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
|
||||
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
|
||||
}
|
||||
|
||||
NEO::PreambleHelper<GfxFamily>::programThreadArbitration(&child, threadArbitrationPolicy);
|
||||
|
||||
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
|
||||
neoDevice->isDebuggerActive();
|
||||
if (devicePreemption == NEO::PreemptionMode::MidThread) {
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
*/
|
||||
|
||||
#include "shared/source/command_container/command_encoder.h"
|
||||
#include "shared/source/gen9/reg_configs.h"
|
||||
#include "shared/source/helpers/preamble.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
|
||||
@@ -544,6 +545,127 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAddingKernelsThenResidencyContainerD
|
||||
}
|
||||
}
|
||||
|
||||
using CommandListArbitrationPolicyTest = Test<ModuleFixture>;
|
||||
|
||||
HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenDefaultThreadArbitrationPolicyIsUsed) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
|
||||
device,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
returnValue)));
|
||||
EXPECT_NE(nullptr, commandList);
|
||||
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
|
||||
|
||||
GenCmdList parsedCommandList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
|
||||
EXPECT_GE(2u, miLoadImm.size());
|
||||
|
||||
for (auto it : miLoadImm) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
|
||||
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
|
||||
cmd->getDataDword());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenChosenThreadArbitrationPolicyIsUsed) {
|
||||
DebugManagerStateRestore restorer;
|
||||
DebugManager.flags.OverrideThreadArbitrationPolicy.set(0);
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
|
||||
device,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
returnValue)));
|
||||
EXPECT_NE(nullptr, commandList);
|
||||
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
|
||||
|
||||
GenCmdList parsedCommandList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
|
||||
EXPECT_GE(2u, miLoadImm.size());
|
||||
|
||||
for (auto it : miLoadImm) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
|
||||
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::AgeBased),
|
||||
cmd->getDataDword());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListArbitrationPolicyTest, whenCommandListIsResetThenOriginalThreadArbitrationPolicyIsKept) {
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
ze_result_t returnValue;
|
||||
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
|
||||
device,
|
||||
NEO::EngineGroupType::RenderCompute,
|
||||
returnValue)));
|
||||
EXPECT_NE(nullptr, commandList);
|
||||
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
|
||||
|
||||
bool found;
|
||||
uint64_t originalThreadArbitrationPolicy = std::numeric_limits<uint64_t>::max();
|
||||
{
|
||||
GenCmdList parsedCommandList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
|
||||
EXPECT_GE(2u, miLoadImm.size());
|
||||
|
||||
for (auto it : miLoadImm) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
|
||||
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
|
||||
cmd->getDataDword());
|
||||
originalThreadArbitrationPolicy = cmd->getDataDword();
|
||||
found = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
commandList->reset();
|
||||
|
||||
{
|
||||
GenCmdList parsedCommandList;
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
|
||||
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
|
||||
commandList->commandContainer.getCommandStream()->getUsed()));
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
|
||||
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
|
||||
EXPECT_GE(2u, miLoadImm.size());
|
||||
|
||||
uint64_t newThreadArbitrationPolicy = std::numeric_limits<uint64_t>::max();
|
||||
for (auto it : miLoadImm) {
|
||||
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
|
||||
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
|
||||
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
|
||||
cmd->getDataDword());
|
||||
newThreadArbitrationPolicy = cmd->getDataDword();
|
||||
EXPECT_EQ(originalThreadArbitrationPolicy, newThreadArbitrationPolicy);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsAddsSemaphoreToCommandStream) {
|
||||
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
|
||||
Mock<::L0::Kernel> kernel;
|
||||
|
||||
Reference in New Issue
Block a user