Add state compute mode tracking

Related-To: NEO-5019

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz 2022-09-20 16:46:15 +00:00 committed by Compute-Runtime-Automation
parent 5c0789aed8
commit 57d35c8932
17 changed files with 652 additions and 4 deletions

View File

@ -323,6 +323,7 @@ struct CommandList : _ze_command_list_handle_t {
bool multiReturnPointCommandList = false;
bool systolicModeSupport = false;
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
std::atomic<uint32_t> barrierCounter{0u};
uint32_t latestFlushedBarrierCounter = 0u;

View File

@ -2320,8 +2320,13 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
if (!containsAnyKernel) {
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
finalStreamState = requiredStreamState;
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
if (this->stateComputeModeTracking) {
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
finalStreamState = requiredStreamState;
} else {
finalStreamState = requiredStreamState;
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
}
containsAnyKernel = true;
}

View File

@ -29,6 +29,7 @@ namespace L0 {
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
}
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};

View File

@ -46,6 +46,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
}
ze_result_t CommandQueueImp::destroy() {

View File

@ -69,6 +69,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
bool internalUsage = false;
bool multiReturnPointCommandList = false;
bool pipelineSelectStateTracking = false;
bool stateComputeModeTracking = false;
};
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,

View File

@ -181,6 +181,15 @@ struct CommandQueueHw : public CommandQueueImp {
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
inline void programRequiredStateComputeModeForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal);
size_t alignedChildStreamPadding{};
};

View File

@ -182,8 +182,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
auto &finalStreamState = commandList->getFinalStreamState();
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
@ -655,7 +657,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
if (this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
auto streamPropertiesCopy = csr->getStreamProperties();
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
@ -667,6 +669,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
streamPropertiesCopy, requiredStreamState, finalStreamState);
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
linearStreamSizeEstimate += estimateScmCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState);
}
}
@ -1174,6 +1177,54 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal) {
if (!this->stateComputeModeTracking) {
return 0;
}
size_t estimatedSize = 0;
bool isRcs = this->getCsr()->isRcs();
size_t singleScmCmdSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(device->getHwInfo(), false, isRcs);
csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
if (csrStateCopy.stateComputeMode.isDirty()) {
estimatedSize += singleScmCmdSize;
}
csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
return estimatedSize;
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandList(CommandList *commandList,
NEO::LinearStream &commandStream,
NEO::StreamProperties &csrState,
const NEO::StreamProperties &cmdListRequired,
const NEO::StreamProperties &cmdListFinal) {
if (!this->stateComputeModeTracking) {
return;
}
csrState.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
if (csrState.stateComputeMode.isDirty()) {
NEO::PipelineSelectArgs pipelineSelectArgs = {
!!csrState.pipelineSelect.systolicMode.value,
false,
false,
commandList->getSystolicModeSupport()};
bool isRcs = this->getCsr()->isRcs();
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs,
false, device->getHwInfo(), isRcs, nullptr);
}
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
}
template <GFXCORE_FAMILY gfxCoreFamily>
bool CommandQueueHw<gfxCoreFamily>::isCleanLeftoverMemoryRequired() {
return false;

View File

@ -31,4 +31,12 @@ bool L0HwHelper::enablePipelineSelectStateTracking() {
return defaultValue;
}
bool L0HwHelper::enableStateComputeModeTracking() {
constexpr bool defaultValue = false;
if (NEO::DebugManager.flags.EnableStateComputeModeTracking.get() != -1) {
return !!NEO::DebugManager.flags.EnableStateComputeModeTracking.get();
}
return defaultValue;
}
} // namespace L0

View File

@ -32,6 +32,7 @@ class L0HwHelper {
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
static bool enableMultiReturnPointCommandList();
static bool enablePipelineSelectStateTracking();
static bool enableStateComputeModeTracking();
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;

View File

@ -126,5 +126,10 @@ void CmdListPipelineSelectStateFixture::setUp() {
ModuleMutableCommandListFixture::setUp();
}
void CmdListStateComputeModeStateFixture::setUp() {
DebugManager.flags.EnableStateComputeModeTracking.set(1);
ModuleMutableCommandListFixture::setUp();
}
} // namespace ult
} // namespace L0

View File

@ -101,5 +101,21 @@ struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixtur
DebugManagerStateRestore restorer;
};
struct CmdListStateComputeModeStateFixture : public ModuleMutableCommandListFixture {
void setUp();
DebugManagerStateRestore restorer;
};
struct CmdListThreadArbitrationFixture : public CmdListStateComputeModeStateFixture {
template <typename FamilyType>
void testBody();
};
struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture {
template <typename FamilyType>
void testBody();
};
} // namespace ult
} // namespace L0

View File

@ -5,6 +5,8 @@
*
*/
#include "shared/source/command_stream/thread_arbitration_policy.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/test/common/helpers/unit_test_helper.h"
#include "shared/test/common/libult/ult_command_stream_receiver.h"
@ -576,5 +578,530 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
EXPECT_EQ(0u, pipelineSelectList.size());
}
template <typename FamilyType>
void CmdListThreadArbitrationFixture::testBody() {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
using EU_THREAD_SCHEDULING_MODE_OVERRIDE = typename STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE;
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
void *currentBuffer = nullptr;
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
auto &cmdListFinalState = commandList->getFinalStreamState();
auto &csrState = commandQueue->csr->getStreamProperties();
auto commandListHandle = commandList->toHandle();
auto &commandListStream = *commandList->commandContainer.getCommandStream();
auto &cmdQueueStream = commandQueue->commandStream;
GenCmdList cmdList;
std::vector<GenCmdList::iterator> stateComputeModeList;
size_t sizeBefore = 0;
size_t sizeAfter = 0;
auto result = ZE_RESULT_SUCCESS;
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, csrState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, csrState.stateComputeMode.threadArbitrationPolicy.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
}
}
template <typename FamilyType>
void CmdListLargeGrfFixture::testBody() {
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
const ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
void *currentBuffer = nullptr;
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
auto &cmdListFinalState = commandList->getFinalStreamState();
auto &csrState = commandQueue->csr->getStreamProperties();
auto commandListHandle = commandList->toHandle();
auto &commandListStream = *commandList->commandContainer.getCommandStream();
auto &cmdQueueStream = commandQueue->commandStream;
GenCmdList cmdList;
std::vector<GenCmdList::iterator> stateComputeModeList;
size_t sizeBefore = 0;
size_t sizeAfter = 0;
auto result = ZE_RESULT_SUCCESS;
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(0, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
ASSERT_EQ(1u, stateComputeModeList.size());
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
cmdList.clear();
stateComputeModeList.clear();
commandList->reset();
}
{
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
sizeBefore = commandListStream.getUsed();
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = commandListStream.getUsed();
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
cmdList.clear();
stateComputeModeList.clear();
commandList->close();
sizeBefore = cmdQueueStream.getUsed();
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
sizeAfter = cmdQueueStream.getUsed();
EXPECT_EQ(1, csrState.stateComputeMode.largeGrfMode.value);
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
currentBuffer,
(sizeAfter - sizeBefore)));
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
EXPECT_EQ(0u, stateComputeModeList.size());
}
}
} // namespace ult
} // namespace L0

View File

@ -65,6 +65,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
using BaseClass::patternAllocations;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::stateComputeModeTracking;
using BaseClass::unifiedMemoryControls;
using BaseClass::updateStreamProperties;
@ -126,6 +127,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
using BaseClass::partitionCount;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::requiredStreamState;
using BaseClass::stateComputeModeTracking;
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
};
@ -151,6 +153,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
using BaseClass::nonImmediateLogicalStateHelper;
using BaseClass::partitionCount;
using BaseClass::pipelineSelectStateTracking;
using BaseClass::stateComputeModeTracking;
WhiteBox(Device *device);
~WhiteBox() override;

View File

@ -37,6 +37,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
using CommandQueue::multiReturnPointCommandList;
using CommandQueue::partitionCount;
using CommandQueue::pipelineSelectStateTracking;
using CommandQueue::stateComputeModeTracking;
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
const ze_command_queue_desc_t *desc);
@ -70,6 +71,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
using L0::CommandQueue::partitionCount;
using L0::CommandQueue::pipelineSelectStateTracking;
using L0::CommandQueue::preemptionCmdSyncProgramming;
using L0::CommandQueue::stateComputeModeTracking;
using L0::CommandQueueImp::csr;
using typename BaseClass::CommandListExecutionContext;

View File

@ -552,5 +552,21 @@ HWTEST2_F(CmdListPipelineSelectStateTest,
testBodyShareStateImmediateRegular<FamilyType>();
}
using CmdListThreadArbitrationTest = Test<CmdListThreadArbitrationFixture>;
using ThreadArbitrationSupport = IsProduct<IGFX_PVC>;
HWTEST2_F(CmdListThreadArbitrationTest,
givenAppendThreadArbitrationKernelToCommandListWhenExecutingCommandListThenStateComputeModeStateIsTrackedCorrectly, ThreadArbitrationSupport) {
testBody<FamilyType>();
}
using CmdListLargeGrfTest = Test<CmdListLargeGrfFixture>;
using LargeGrfSupport = IsAnyProducts<IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;
HWTEST2_F(CmdListLargeGrfTest,
givenAppendLargeGrfKernelToCommandListWhenExecutingCommandListThenStateComputeModeStateIsTrackedCorrectly, LargeGrfSupport) {
testBody<FamilyType>();
}
} // namespace ult
} // namespace L0

View File

@ -479,6 +479,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedSta
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
DECLARE_DEBUG_VARIABLE(int32_t, MultiReturnPointCommandList, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state compute mode changes in command lists")
/* Binary Cache */
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")

View File

@ -168,6 +168,7 @@ CsrDispatchMode = 0
OverrideDefaultFP64Settings = -1
RenderCompressedImagesEnabled = -1
RenderCompressedBuffersEnabled = -1
EnableStateComputeModeTracking = -1
EnableUsmConcurrentAccessSupport = 0
EnableSharedSystemUsmSupport = -1
EnablePassInlineData = -1