Add state compute mode tracking
Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
parent
5c0789aed8
commit
57d35c8932
|
@ -323,6 +323,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||||
bool multiReturnPointCommandList = false;
|
bool multiReturnPointCommandList = false;
|
||||||
bool systolicModeSupport = false;
|
bool systolicModeSupport = false;
|
||||||
bool pipelineSelectStateTracking = false;
|
bool pipelineSelectStateTracking = false;
|
||||||
|
bool stateComputeModeTracking = false;
|
||||||
|
|
||||||
std::atomic<uint32_t> barrierCounter{0u};
|
std::atomic<uint32_t> barrierCounter{0u};
|
||||||
uint32_t latestFlushedBarrierCounter = 0u;
|
uint32_t latestFlushedBarrierCounter = 0u;
|
||||||
|
|
|
@ -2320,8 +2320,13 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||||
if (!containsAnyKernel) {
|
if (!containsAnyKernel) {
|
||||||
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
|
requiredStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, true, -1, hwInfo);
|
||||||
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
|
requiredStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
|
||||||
|
if (this->stateComputeModeTracking) {
|
||||||
|
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
||||||
|
finalStreamState = requiredStreamState;
|
||||||
|
} else {
|
||||||
finalStreamState = requiredStreamState;
|
finalStreamState = requiredStreamState;
|
||||||
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
requiredStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
||||||
|
}
|
||||||
containsAnyKernel = true;
|
containsAnyKernel = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -29,6 +29,7 @@ namespace L0 {
|
||||||
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
|
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
|
||||||
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
||||||
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
||||||
|
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
|
||||||
}
|
}
|
||||||
|
|
||||||
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
||||||
|
|
|
@ -46,6 +46,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
|
||||||
|
|
||||||
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
||||||
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
||||||
|
stateComputeModeTracking = L0HwHelper::enableStateComputeModeTracking();
|
||||||
}
|
}
|
||||||
|
|
||||||
ze_result_t CommandQueueImp::destroy() {
|
ze_result_t CommandQueueImp::destroy() {
|
||||||
|
|
|
@ -69,6 +69,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||||
bool internalUsage = false;
|
bool internalUsage = false;
|
||||||
bool multiReturnPointCommandList = false;
|
bool multiReturnPointCommandList = false;
|
||||||
bool pipelineSelectStateTracking = false;
|
bool pipelineSelectStateTracking = false;
|
||||||
|
bool stateComputeModeTracking = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
||||||
|
|
|
@ -181,6 +181,15 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||||
const NEO::StreamProperties &cmdListRequired,
|
const NEO::StreamProperties &cmdListRequired,
|
||||||
const NEO::StreamProperties &cmdListFinal);
|
const NEO::StreamProperties &cmdListFinal);
|
||||||
|
|
||||||
|
inline size_t estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
|
||||||
|
const NEO::StreamProperties &cmdListRequired,
|
||||||
|
const NEO::StreamProperties &cmdListFinal);
|
||||||
|
inline void programRequiredStateComputeModeForCommandList(CommandList *commandList,
|
||||||
|
NEO::LinearStream &commandStream,
|
||||||
|
NEO::StreamProperties &csrState,
|
||||||
|
const NEO::StreamProperties &cmdListRequired,
|
||||||
|
const NEO::StreamProperties &cmdListFinal);
|
||||||
|
|
||||||
size_t alignedChildStreamPadding{};
|
size_t alignedChildStreamPadding{};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -182,8 +182,10 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||||
auto &finalStreamState = commandList->getFinalStreamState();
|
auto &finalStreamState = commandList->getFinalStreamState();
|
||||||
|
|
||||||
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
|
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
|
||||||
|
|
||||||
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||||
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
|
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||||
|
this->programRequiredStateComputeModeForCommandList(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||||
|
|
||||||
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
||||||
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
this->programOneCmdListBatchBufferStart(commandList, child, ctx);
|
||||||
|
@ -655,7 +657,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||||
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
|
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
|
||||||
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
|
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
|
||||||
|
|
||||||
if (this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
|
if (this->stateComputeModeTracking || this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
|
||||||
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
|
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
|
||||||
auto streamPropertiesCopy = csr->getStreamProperties();
|
auto streamPropertiesCopy = csr->getStreamProperties();
|
||||||
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
|
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
|
||||||
|
@ -667,6 +669,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||||
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
|
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
|
||||||
streamPropertiesCopy, requiredStreamState, finalStreamState);
|
streamPropertiesCopy, requiredStreamState, finalStreamState);
|
||||||
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
|
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
|
||||||
|
linearStreamSizeEstimate += estimateScmCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1174,6 +1177,54 @@ void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList
|
||||||
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
|
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
size_t CommandQueueHw<gfxCoreFamily>::estimateScmCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
|
||||||
|
const NEO::StreamProperties &cmdListRequired,
|
||||||
|
const NEO::StreamProperties &cmdListFinal) {
|
||||||
|
if (!this->stateComputeModeTracking) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t estimatedSize = 0;
|
||||||
|
|
||||||
|
bool isRcs = this->getCsr()->isRcs();
|
||||||
|
size_t singleScmCmdSize = NEO::EncodeComputeMode<GfxFamily>::getCmdSizeForComputeMode(device->getHwInfo(), false, isRcs);
|
||||||
|
|
||||||
|
csrStateCopy.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
|
||||||
|
if (csrStateCopy.stateComputeMode.isDirty()) {
|
||||||
|
estimatedSize += singleScmCmdSize;
|
||||||
|
}
|
||||||
|
csrStateCopy.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
||||||
|
|
||||||
|
return estimatedSize;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
|
void CommandQueueHw<gfxCoreFamily>::programRequiredStateComputeModeForCommandList(CommandList *commandList,
|
||||||
|
NEO::LinearStream &commandStream,
|
||||||
|
NEO::StreamProperties &csrState,
|
||||||
|
const NEO::StreamProperties &cmdListRequired,
|
||||||
|
const NEO::StreamProperties &cmdListFinal) {
|
||||||
|
if (!this->stateComputeModeTracking) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
csrState.stateComputeMode.setProperties(cmdListRequired.stateComputeMode);
|
||||||
|
|
||||||
|
if (csrState.stateComputeMode.isDirty()) {
|
||||||
|
NEO::PipelineSelectArgs pipelineSelectArgs = {
|
||||||
|
!!csrState.pipelineSelect.systolicMode.value,
|
||||||
|
false,
|
||||||
|
false,
|
||||||
|
commandList->getSystolicModeSupport()};
|
||||||
|
|
||||||
|
bool isRcs = this->getCsr()->isRcs();
|
||||||
|
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(commandStream, csrState.stateComputeMode, pipelineSelectArgs,
|
||||||
|
false, device->getHwInfo(), isRcs, nullptr);
|
||||||
|
}
|
||||||
|
csrState.stateComputeMode.setProperties(cmdListFinal.stateComputeMode);
|
||||||
|
}
|
||||||
|
|
||||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||||
bool CommandQueueHw<gfxCoreFamily>::isCleanLeftoverMemoryRequired() {
|
bool CommandQueueHw<gfxCoreFamily>::isCleanLeftoverMemoryRequired() {
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -31,4 +31,12 @@ bool L0HwHelper::enablePipelineSelectStateTracking() {
|
||||||
return defaultValue;
|
return defaultValue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool L0HwHelper::enableStateComputeModeTracking() {
|
||||||
|
constexpr bool defaultValue = false;
|
||||||
|
if (NEO::DebugManager.flags.EnableStateComputeModeTracking.get() != -1) {
|
||||||
|
return !!NEO::DebugManager.flags.EnableStateComputeModeTracking.get();
|
||||||
|
}
|
||||||
|
return defaultValue;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -32,6 +32,7 @@ class L0HwHelper {
|
||||||
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
|
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
|
||||||
static bool enableMultiReturnPointCommandList();
|
static bool enableMultiReturnPointCommandList();
|
||||||
static bool enablePipelineSelectStateTracking();
|
static bool enablePipelineSelectStateTracking();
|
||||||
|
static bool enableStateComputeModeTracking();
|
||||||
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
||||||
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
||||||
|
|
||||||
|
|
|
@ -126,5 +126,10 @@ void CmdListPipelineSelectStateFixture::setUp() {
|
||||||
ModuleMutableCommandListFixture::setUp();
|
ModuleMutableCommandListFixture::setUp();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void CmdListStateComputeModeStateFixture::setUp() {
|
||||||
|
DebugManager.flags.EnableStateComputeModeTracking.set(1);
|
||||||
|
ModuleMutableCommandListFixture::setUp();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -101,5 +101,21 @@ struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixtur
|
||||||
DebugManagerStateRestore restorer;
|
DebugManagerStateRestore restorer;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CmdListStateComputeModeStateFixture : public ModuleMutableCommandListFixture {
|
||||||
|
void setUp();
|
||||||
|
|
||||||
|
DebugManagerStateRestore restorer;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdListThreadArbitrationFixture : public CmdListStateComputeModeStateFixture {
|
||||||
|
template <typename FamilyType>
|
||||||
|
void testBody();
|
||||||
|
};
|
||||||
|
|
||||||
|
struct CmdListLargeGrfFixture : public CmdListStateComputeModeStateFixture {
|
||||||
|
template <typename FamilyType>
|
||||||
|
void testBody();
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -5,6 +5,8 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#include "shared/source/command_stream/thread_arbitration_policy.h"
|
||||||
|
#include "shared/source/kernel/grf_config.h"
|
||||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||||
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
#include "shared/test/common/libult/ult_command_stream_receiver.h"
|
||||||
|
|
||||||
|
@ -576,5 +578,530 @@ void CmdListPipelineSelectStateFixture::testBodyShareStateImmediateRegular() {
|
||||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename FamilyType>
|
||||||
|
void CmdListThreadArbitrationFixture::testBody() {
|
||||||
|
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||||
|
using EU_THREAD_SCHEDULING_MODE_OVERRIDE = typename STATE_COMPUTE_MODE::EU_THREAD_SCHEDULING_MODE_OVERRIDE;
|
||||||
|
|
||||||
|
const ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
|
|
||||||
|
void *currentBuffer = nullptr;
|
||||||
|
|
||||||
|
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
||||||
|
auto &cmdListFinalState = commandList->getFinalStreamState();
|
||||||
|
auto &csrState = commandQueue->csr->getStreamProperties();
|
||||||
|
|
||||||
|
auto commandListHandle = commandList->toHandle();
|
||||||
|
|
||||||
|
auto &commandListStream = *commandList->commandContainer.getCommandStream();
|
||||||
|
auto &cmdQueueStream = commandQueue->commandStream;
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
std::vector<GenCmdList::iterator> stateComputeModeList;
|
||||||
|
size_t sizeBefore = 0;
|
||||||
|
size_t sizeAfter = 0;
|
||||||
|
auto result = ZE_RESULT_SUCCESS;
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
|
||||||
|
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobin;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::AgeBased;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::AgeBased, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_OLDEST_FIRST, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobin, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_STALL_BASED_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_EQ(EU_THREAD_SCHEDULING_MODE_OVERRIDE::EU_THREAD_SCHEDULING_MODE_OVERRIDE_ROUND_ROBIN, stateComputeModeCmd->getEuThreadSchedulingModeOverride());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.threadArbitrationPolicy = NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdlistRequiredState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, cmdListFinalState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(NEO::ThreadArbitrationPolicy::RoundRobinAfterDependency, csrState.stateComputeMode.threadArbitrationPolicy.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename FamilyType>
|
||||||
|
void CmdListLargeGrfFixture::testBody() {
|
||||||
|
using STATE_COMPUTE_MODE = typename FamilyType::STATE_COMPUTE_MODE;
|
||||||
|
|
||||||
|
const ze_group_count_t groupCount{1, 1, 1};
|
||||||
|
CmdListKernelLaunchParams launchParams = {};
|
||||||
|
|
||||||
|
void *currentBuffer = nullptr;
|
||||||
|
|
||||||
|
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
||||||
|
auto &cmdListFinalState = commandList->getFinalStreamState();
|
||||||
|
auto &csrState = commandQueue->csr->getStreamProperties();
|
||||||
|
|
||||||
|
auto commandListHandle = commandList->toHandle();
|
||||||
|
|
||||||
|
auto &commandListStream = *commandList->commandContainer.getCommandStream();
|
||||||
|
auto &cmdQueueStream = commandQueue->commandStream;
|
||||||
|
|
||||||
|
GenCmdList cmdList;
|
||||||
|
std::vector<GenCmdList::iterator> stateComputeModeList;
|
||||||
|
size_t sizeBefore = 0;
|
||||||
|
size_t sizeAfter = 0;
|
||||||
|
auto result = ZE_RESULT_SUCCESS;
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||||
|
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(0, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(0, csrState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::DefaultGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(0, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
auto stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_FALSE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, csrState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
ASSERT_EQ(1u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
stateComputeModeCmd = genCmdCast<STATE_COMPUTE_MODE *>(*stateComputeModeList[0]);
|
||||||
|
EXPECT_TRUE(stateComputeModeCmd->getLargeGrfMode());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->reset();
|
||||||
|
}
|
||||||
|
{
|
||||||
|
mockKernelImmData->kernelDescriptor->kernelAttributes.numGrfRequired = GrfConfig::LargeGrfNumber;
|
||||||
|
sizeBefore = commandListStream.getUsed();
|
||||||
|
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = commandListStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, cmdlistRequiredState.stateComputeMode.largeGrfMode.value);
|
||||||
|
EXPECT_EQ(1, cmdListFinalState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||||
|
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
|
||||||
|
cmdList.clear();
|
||||||
|
stateComputeModeList.clear();
|
||||||
|
commandList->close();
|
||||||
|
|
||||||
|
sizeBefore = cmdQueueStream.getUsed();
|
||||||
|
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||||
|
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||||
|
sizeAfter = cmdQueueStream.getUsed();
|
||||||
|
|
||||||
|
EXPECT_EQ(1, csrState.stateComputeMode.largeGrfMode.value);
|
||||||
|
|
||||||
|
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||||
|
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||||
|
currentBuffer,
|
||||||
|
(sizeAfter - sizeBefore)));
|
||||||
|
stateComputeModeList = findAll<STATE_COMPUTE_MODE *>(cmdList.begin(), cmdList.end());
|
||||||
|
EXPECT_EQ(0u, stateComputeModeList.size());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -65,6 +65,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||||
using BaseClass::patternAllocations;
|
using BaseClass::patternAllocations;
|
||||||
using BaseClass::pipelineSelectStateTracking;
|
using BaseClass::pipelineSelectStateTracking;
|
||||||
using BaseClass::requiredStreamState;
|
using BaseClass::requiredStreamState;
|
||||||
|
using BaseClass::stateComputeModeTracking;
|
||||||
using BaseClass::unifiedMemoryControls;
|
using BaseClass::unifiedMemoryControls;
|
||||||
using BaseClass::updateStreamProperties;
|
using BaseClass::updateStreamProperties;
|
||||||
|
|
||||||
|
@ -126,6 +127,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||||
using BaseClass::partitionCount;
|
using BaseClass::partitionCount;
|
||||||
using BaseClass::pipelineSelectStateTracking;
|
using BaseClass::pipelineSelectStateTracking;
|
||||||
using BaseClass::requiredStreamState;
|
using BaseClass::requiredStreamState;
|
||||||
|
using BaseClass::stateComputeModeTracking;
|
||||||
|
|
||||||
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
|
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
|
||||||
};
|
};
|
||||||
|
@ -151,6 +153,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
||||||
using BaseClass::nonImmediateLogicalStateHelper;
|
using BaseClass::nonImmediateLogicalStateHelper;
|
||||||
using BaseClass::partitionCount;
|
using BaseClass::partitionCount;
|
||||||
using BaseClass::pipelineSelectStateTracking;
|
using BaseClass::pipelineSelectStateTracking;
|
||||||
|
using BaseClass::stateComputeModeTracking;
|
||||||
|
|
||||||
WhiteBox(Device *device);
|
WhiteBox(Device *device);
|
||||||
~WhiteBox() override;
|
~WhiteBox() override;
|
||||||
|
|
|
@ -37,6 +37,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||||
using CommandQueue::multiReturnPointCommandList;
|
using CommandQueue::multiReturnPointCommandList;
|
||||||
using CommandQueue::partitionCount;
|
using CommandQueue::partitionCount;
|
||||||
using CommandQueue::pipelineSelectStateTracking;
|
using CommandQueue::pipelineSelectStateTracking;
|
||||||
|
using CommandQueue::stateComputeModeTracking;
|
||||||
|
|
||||||
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
|
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
|
||||||
const ze_command_queue_desc_t *desc);
|
const ze_command_queue_desc_t *desc);
|
||||||
|
@ -70,6 +71,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||||
using L0::CommandQueue::partitionCount;
|
using L0::CommandQueue::partitionCount;
|
||||||
using L0::CommandQueue::pipelineSelectStateTracking;
|
using L0::CommandQueue::pipelineSelectStateTracking;
|
||||||
using L0::CommandQueue::preemptionCmdSyncProgramming;
|
using L0::CommandQueue::preemptionCmdSyncProgramming;
|
||||||
|
using L0::CommandQueue::stateComputeModeTracking;
|
||||||
using L0::CommandQueueImp::csr;
|
using L0::CommandQueueImp::csr;
|
||||||
using typename BaseClass::CommandListExecutionContext;
|
using typename BaseClass::CommandListExecutionContext;
|
||||||
|
|
||||||
|
|
|
@ -552,5 +552,21 @@ HWTEST2_F(CmdListPipelineSelectStateTest,
|
||||||
testBodyShareStateImmediateRegular<FamilyType>();
|
testBodyShareStateImmediateRegular<FamilyType>();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
using CmdListThreadArbitrationTest = Test<CmdListThreadArbitrationFixture>;
|
||||||
|
|
||||||
|
using ThreadArbitrationSupport = IsProduct<IGFX_PVC>;
|
||||||
|
HWTEST2_F(CmdListThreadArbitrationTest,
|
||||||
|
givenAppendThreadArbitrationKernelToCommandListWhenExecutingCommandListThenStateComputeModeStateIsTrackedCorrectly, ThreadArbitrationSupport) {
|
||||||
|
testBody<FamilyType>();
|
||||||
|
}
|
||||||
|
|
||||||
|
using CmdListLargeGrfTest = Test<CmdListLargeGrfFixture>;
|
||||||
|
|
||||||
|
using LargeGrfSupport = IsAnyProducts<IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;
|
||||||
|
HWTEST2_F(CmdListLargeGrfTest,
|
||||||
|
givenAppendLargeGrfKernelToCommandListWhenExecutingCommandListThenStateComputeModeStateIsTrackedCorrectly, LargeGrfSupport) {
|
||||||
|
testBody<FamilyType>();
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace ult
|
} // namespace ult
|
||||||
} // namespace L0
|
} // namespace L0
|
||||||
|
|
|
@ -479,6 +479,6 @@ DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedSta
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
|
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, MultiReturnPointCommandList, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
|
DECLARE_DEBUG_VARIABLE(int32_t, MultiReturnPointCommandList, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
|
||||||
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
|
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
|
||||||
|
DECLARE_DEBUG_VARIABLE(int32_t, EnableStateComputeModeTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables tracking state compute mode changes in command lists")
|
||||||
/* Binary Cache */
|
/* Binary Cache */
|
||||||
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")
|
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")
|
||||||
|
|
|
@ -168,6 +168,7 @@ CsrDispatchMode = 0
|
||||||
OverrideDefaultFP64Settings = -1
|
OverrideDefaultFP64Settings = -1
|
||||||
RenderCompressedImagesEnabled = -1
|
RenderCompressedImagesEnabled = -1
|
||||||
RenderCompressedBuffersEnabled = -1
|
RenderCompressedBuffersEnabled = -1
|
||||||
|
EnableStateComputeModeTracking = -1
|
||||||
EnableUsmConcurrentAccessSupport = 0
|
EnableUsmConcurrentAccessSupport = 0
|
||||||
EnableSharedSystemUsmSupport = -1
|
EnableSharedSystemUsmSupport = -1
|
||||||
EnablePassInlineData = -1
|
EnablePassInlineData = -1
|
||||||
|
|
Loading…
Reference in New Issue