Move thread arbitration programming to command list

Thread arbitration policy can be programmed in the command list
instead of doing it in executeCommandList(), so move it there.

Related-to: NEO-5187

Change-Id: I518e19e939e0548d9d454738c6deb5e095154422
Signed-off-by: Jaime Arteaga <jaime.a.arteaga.molina@intel.com>
This commit is contained in:
Jaime Arteaga
2020-10-15 23:11:07 -07:00
committed by sys_ocldev
parent 504dfe8a50
commit b62a121f6f
4 changed files with 152 additions and 26 deletions

View File

@@ -204,6 +204,7 @@ struct CommandListCoreFamily : CommandListImp {
void appendSignalEventPostWalker(ze_event_handle_t hEvent);
bool useMemCopyToBlitFill(size_t patternSize);
void programStateBaseAddress(NEO::CommandContainer &container, bool genericMediaStateClearRequired);
void programThreadArbitrationPolicy(Device *device);
uint64_t getInputBufferSize(NEO::ImageType imageType, uint64_t bytesPerPixel, const ze_image_region_t *region);
virtual AlignedAllocationData getAlignedAllocation(Device *device, const void *buffer, uint64_t bufferSize);

View File

@@ -14,6 +14,7 @@
#include "shared/source/helpers/heap_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/hw_info.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/surface_format_info.h"
@@ -50,6 +51,32 @@ inline ze_result_t parseErrorCode(NEO::ErrorCode returnValue) {
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandListCoreFamily<gfxCoreFamily>::programThreadArbitrationPolicy(Device *device) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
auto &hwHelper = NEO::HwHelper::get(device->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
}
NEO::PreambleHelper<GfxFamily>::programThreadArbitration(commandContainer.getCommandStream(), threadArbitrationPolicy);
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
printfFunctionContainer.clear();
removeDeallocationContainerData();
removeHostPtrAllocations();
commandContainer.reset();
if (!isCopyOnly()) {
programStateBaseAddress(commandContainer, true);
programThreadArbitrationPolicy(device);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO::EngineGroupType engineGroupType) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;
@@ -63,6 +90,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
if (returnType == ZE_RESULT_SUCCESS) {
if (!isCopyOnly()) {
programStateBaseAddress(commandContainer, false);
programThreadArbitrationPolicy(device);
}
}
@@ -1591,20 +1619,6 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::reserveSpace(size_t size, void
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::reset() {
printfFunctionContainer.clear();
removeDeallocationContainerData();
removeHostPtrAllocations();
commandContainer.reset();
if (!isCopyOnly()) {
programStateBaseAddress(commandContainer, true);
}
return ZE_RESULT_SUCCESS;
}
template <GFXCORE_FAMILY gfxCoreFamily>
ze_result_t CommandListCoreFamily<gfxCoreFamily>::prepareIndirectParams(const ze_group_count_t *pThreadGroupDimensions) {
using GfxFamily = typename NEO::GfxFamilyMapper<gfxCoreFamily>::GfxFamily;

View File

@@ -81,7 +81,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
size_t spaceForResidency = 0;
size_t preemptionSize = 0u;
size_t debuggerCmdsSize = 0;
size_t threadArbitrationCmdSize = 0;
constexpr size_t residencyContainerSpaceForPreemption = 2;
constexpr size_t residencyContainerSpaceForFence = 1;
constexpr size_t residencyContainerSpaceForTagWrite = 1;
@@ -97,8 +96,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
statePreemption = devicePreemption;
}
threadArbitrationCmdSize = NEO::PreambleHelper<GfxFamily>::getThreadArbitrationCommandsSize();
if (!commandQueueDebugCmdsProgrammed) {
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(neoDevice->isDebuggerActive());
}
@@ -188,7 +185,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
linearStreamSizeEstimate += estimateStateBaseAddressCmdSize();
}
linearStreamSizeEstimate += threadArbitrationCmdSize + preemptionSize + debuggerCmdsSize;
linearStreamSizeEstimate += preemptionSize + debuggerCmdsSize;
}
linearStreamSizeEstimate += isCopyOnlyCommandQueue ? NEO::EncodeMiFlushDW<GfxFamily>::getMiFlushDwCmdSizeForDataWrite() : NEO::MemorySynchronizationCommands<GfxFamily>::getSizeForPipeControlWithPostSyncOperation(device->getHwInfo());
@@ -237,14 +234,6 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
statePreemption = commandQueuePreemptionMode;
}
auto &hwHelper = NEO::HwHelper::get(neoDevice->getHardwareInfo().platform.eRenderCoreFamily);
uint32_t threadArbitrationPolicy = hwHelper.getDefaultThreadArbitrationPolicy();
if (NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get() != -1) {
threadArbitrationPolicy = static_cast<uint32_t>(NEO::DebugManager.flags.OverrideThreadArbitrationPolicy.get());
}
NEO::PreambleHelper<GfxFamily>::programThreadArbitration(&child, threadArbitrationPolicy);
const bool sipKernelUsed = devicePreemption == NEO::PreemptionMode::MidThread ||
neoDevice->isDebuggerActive();
if (devicePreemption == NEO::PreemptionMode::MidThread) {

View File

@@ -6,6 +6,7 @@
*/
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/gen9/reg_configs.h"
#include "shared/source/helpers/preamble.h"
#include "shared/source/helpers/register_offsets.h"
#include "shared/test/unit_test/cmd_parse/gen_cmd_parse.h"
@@ -544,6 +545,127 @@ HWTEST_F(CommandListAppendLaunchKernel, WhenAddingKernelsThenResidencyContainerD
}
}
using CommandListArbitrationPolicyTest = Test<ModuleFixture>;
HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenDefaultThreadArbitrationPolicyIsUsed) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
ze_result_t returnValue;
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
device,
NEO::EngineGroupType::RenderCompute,
returnValue)));
EXPECT_NE(nullptr, commandList);
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
GenCmdList parsedCommandList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
EXPECT_GE(2u, miLoadImm.size());
for (auto it : miLoadImm) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
cmd->getDataDword());
}
}
}
HWTEST_F(CommandListArbitrationPolicyTest, whenCreatingCommandListThenChosenThreadArbitrationPolicyIsUsed) {
DebugManagerStateRestore restorer;
DebugManager.flags.OverrideThreadArbitrationPolicy.set(0);
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
ze_result_t returnValue;
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
device,
NEO::EngineGroupType::RenderCompute,
returnValue)));
EXPECT_NE(nullptr, commandList);
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
GenCmdList parsedCommandList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
EXPECT_GE(2u, miLoadImm.size());
for (auto it : miLoadImm) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::AgeBased),
cmd->getDataDword());
}
}
}
HWTEST_F(CommandListArbitrationPolicyTest, whenCommandListIsResetThenOriginalThreadArbitrationPolicyIsKept) {
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
ze_result_t returnValue;
auto commandList = std::unique_ptr<CommandList>(whitebox_cast(L0::CommandList::create(productFamily,
device,
NEO::EngineGroupType::RenderCompute,
returnValue)));
EXPECT_NE(nullptr, commandList);
EXPECT_NE(nullptr, commandList->commandContainer.getCommandStream());
bool found;
uint64_t originalThreadArbitrationPolicy = std::numeric_limits<uint64_t>::max();
{
GenCmdList parsedCommandList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
EXPECT_GE(2u, miLoadImm.size());
for (auto it : miLoadImm) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
cmd->getDataDword());
originalThreadArbitrationPolicy = cmd->getDataDword();
found = false;
}
}
}
commandList->reset();
{
GenCmdList parsedCommandList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
parsedCommandList, ptrOffset(commandList->commandContainer.getCommandStream()->getCpuBase(), 0),
commandList->commandContainer.getCommandStream()->getUsed()));
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(parsedCommandList.begin(), parsedCommandList.end());
EXPECT_GE(2u, miLoadImm.size());
uint64_t newThreadArbitrationPolicy = std::numeric_limits<uint64_t>::max();
for (auto it : miLoadImm) {
auto cmd = genCmdCast<MI_LOAD_REGISTER_IMM *>(*it);
if (cmd->getRegisterOffset() == NEO::DebugControlReg2::address) {
EXPECT_EQ(NEO::DebugControlReg2::getRegData(NEO::ThreadArbitrationPolicy::RoundRobin),
cmd->getDataDword());
newThreadArbitrationPolicy = cmd->getDataDword();
EXPECT_EQ(originalThreadArbitrationPolicy, newThreadArbitrationPolicy);
}
}
}
}
HWTEST_F(CommandListAppendLaunchKernel, givenSingleValidWaitEventsAddsSemaphoreToCommandStream) {
using MI_SEMAPHORE_WAIT = typename FamilyType::MI_SEMAPHORE_WAIT;
Mock<::L0::Kernel> kernel;