mirror of
https://github.com/intel/compute-runtime.git
synced 2025-09-10 12:53:42 +08:00
Add pipeline select state tracking
This optimization removes pipeline select from command list preamble and presented to command queue for necessary state update. Code is disabled by default and available under debug key. Related-To: NEO-5019 Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:

committed by
Compute-Runtime-Automation

parent
7aecea534f
commit
e960802e33
@ -276,6 +276,10 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
void makeResidentAndMigrate(bool);
|
||||
void migrateSharedAllocations();
|
||||
|
||||
bool getSystolicModeSupport() const {
|
||||
return systolicModeSupport;
|
||||
}
|
||||
|
||||
ze_context_handle_t hContext = nullptr;
|
||||
std::vector<Kernel *> printfKernelContainer;
|
||||
CommandQueue *cmdQImmediate = nullptr;
|
||||
@ -318,6 +322,7 @@ struct CommandList : _ze_command_list_handle_t {
|
||||
bool performMemoryPrefetch = false;
|
||||
bool multiReturnPointCommandList = false;
|
||||
bool systolicModeSupport = false;
|
||||
bool pipelineSelectStateTracking = false;
|
||||
};
|
||||
|
||||
using CommandListAllocatorFn = CommandList *(*)(uint32_t);
|
||||
|
@ -140,7 +140,11 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::initialize(Device *device, NEO
|
||||
commandContainer.setReservedSshSize(getReserveSshSize());
|
||||
DeviceImp *deviceImp = static_cast<DeviceImp *>(device);
|
||||
auto returnValue = commandContainer.initialize(deviceImp->getActiveDevice(), deviceImp->allocationsForReuse.get(), !isCopyOnly());
|
||||
commandContainer.systolicModeSupport = this->systolicModeSupport;
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
// allow systolic support set in container when tracking disabled
|
||||
// setting systolic support allows dispatching untracked command in legacy mode
|
||||
commandContainer.systolicModeSupport = this->systolicModeSupport;
|
||||
}
|
||||
|
||||
ze_result_t returnType = parseErrorCode(returnValue);
|
||||
if (returnType == ZE_RESULT_SUCCESS) {
|
||||
@ -2323,10 +2327,21 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
containsAnyKernel = true;
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
auto logicalStateHelperBlock = !getLogicalStateHelper();
|
||||
|
||||
finalStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
|
||||
if (this->pipelineSelectStateTracking && finalStreamState.pipelineSelect.isDirty() && logicalStateHelperBlock) {
|
||||
NEO::PipelineSelectArgs pipelineSelectArgs;
|
||||
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
|
||||
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
|
||||
|
||||
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(commandContainer.getCommandStream(),
|
||||
pipelineSelectArgs,
|
||||
hwInfo);
|
||||
}
|
||||
|
||||
finalStreamState.frontEndState.setProperties(isCooperative, kernelAttributes.flags.requiresDisabledEUFusion, disableOverdispatch, -1, hwInfo);
|
||||
bool isPatchingVfeStateAllowed = NEO::DebugManager.flags.AllowPatchingVfeStateInCommandLists.get();
|
||||
if (finalStreamState.frontEndState.isDirty() && logicalStateHelperBlock) {
|
||||
if (isPatchingVfeStateAllowed) {
|
||||
auto pVfeStateAddress = NEO::PreambleHelper<GfxFamily>::getSpaceForVfeState(commandContainer.getCommandStream(), hwInfo, engineGroupType);
|
||||
@ -2348,14 +2363,15 @@ void CommandListCoreFamily<gfxCoreFamily>::updateStreamProperties(Kernel &kernel
|
||||
}
|
||||
|
||||
finalStreamState.stateComputeMode.setProperties(false, kernelAttributes.numGrfRequired, kernelAttributes.threadArbitrationPolicy, device->getDevicePreemptionMode(), hwInfo);
|
||||
|
||||
if (finalStreamState.stateComputeMode.isDirty() && logicalStateHelperBlock) {
|
||||
bool isRcs = (this->engineGroupType == NEO::EngineGroupType::RenderCompute);
|
||||
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
|
||||
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, {}, false, hwInfo, isRcs, nullptr);
|
||||
}
|
||||
NEO::PipelineSelectArgs pipelineSelectArgs;
|
||||
pipelineSelectArgs.systolicPipelineSelectMode = kernelAttributes.flags.usesSystolicPipelineSelectMode;
|
||||
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeSupport;
|
||||
|
||||
finalStreamState.pipelineSelect.setProperties(true, false, kernelAttributes.flags.usesSystolicPipelineSelectMode, hwInfo);
|
||||
NEO::EncodeComputeMode<GfxFamily>::programComputeModeCommandWithSynchronization(
|
||||
*commandContainer.getCommandStream(), finalStreamState.stateComputeMode, pipelineSelectArgs, false, hwInfo, isRcs, nullptr);
|
||||
}
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -28,6 +28,7 @@ namespace L0 {
|
||||
|
||||
CommandList::CommandList(uint32_t numIddsPerBlock) : commandContainer(numIddsPerBlock) {
|
||||
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
||||
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
||||
}
|
||||
|
||||
CommandListAllocatorFn commandListFactory[IGFX_MAX_PRODUCT] = {};
|
||||
|
@ -45,6 +45,7 @@ CommandQueueImp::CommandQueueImp(Device *device, NEO::CommandStreamReceiver *csr
|
||||
}
|
||||
|
||||
multiReturnPointCommandList = L0HwHelper::enableMultiReturnPointCommandList();
|
||||
pipelineSelectStateTracking = L0HwHelper::enablePipelineSelectStateTracking();
|
||||
}
|
||||
|
||||
ze_result_t CommandQueueImp::destroy() {
|
||||
|
@ -68,6 +68,7 @@ struct CommandQueue : _ze_command_queue_handle_t {
|
||||
bool isCopyOnlyCommandQueue = false;
|
||||
bool internalUsage = false;
|
||||
bool multiReturnPointCommandList = false;
|
||||
bool pipelineSelectStateTracking = false;
|
||||
};
|
||||
|
||||
using CommandQueueAllocatorFn = CommandQueue *(*)(Device *device, NEO::CommandStreamReceiver *csr,
|
||||
|
@ -44,7 +44,6 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
size_t estimateFrontEndCmdSize();
|
||||
size_t estimateFrontEndCmdSize(bool isFrontEndDirty);
|
||||
|
||||
size_t estimatePipelineSelect();
|
||||
void programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &commandStream);
|
||||
|
||||
MOCKABLE_VIRTUAL void handleScratchSpace(NEO::HeapContainer &heapContainer,
|
||||
@ -171,7 +170,16 @@ struct CommandQueueHw : public CommandQueueImp {
|
||||
inline void updateTaskCountAndPostSync(bool isDispatchTaskCountPostSyncRequired);
|
||||
inline ze_result_t waitForCommandQueueCompletionAndCleanHeapContainer();
|
||||
inline ze_result_t handleSubmissionAndCompletionResults(NEO::SubmissionStatus submitRet, ze_result_t completionRet);
|
||||
inline void updatePipelineSelectState(CommandList *commandList);
|
||||
inline size_t estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
const NEO::StreamProperties &cmdListFinal,
|
||||
bool &gpgpuEnabled);
|
||||
inline size_t estimatePipelineSelectCmdSize();
|
||||
inline void programOneCmdListPipelineSelect(CommandList *commandList,
|
||||
NEO::LinearStream &commandStream,
|
||||
NEO::StreamProperties &csrState,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
const NEO::StreamProperties &cmdListFinal);
|
||||
|
||||
size_t alignedChildStreamPadding{};
|
||||
};
|
||||
|
@ -146,7 +146,18 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
this->csr->programHardwareContext(child);
|
||||
this->makeSbaTrackingBufferResidentIfL0DebuggerEnabled(ctx.isDebugEnabled);
|
||||
|
||||
this->programPipelineSelectIfGpgpuDisabled(child);
|
||||
auto &csrStateProperties = csr->getStreamProperties();
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
this->programPipelineSelectIfGpgpuDisabled(child);
|
||||
} else {
|
||||
// Setting systolic/pipeline select here for 1st command list is to preserve dispatch order of hw commands
|
||||
auto commandList = CommandList::fromHandle(phCommandLists[0]);
|
||||
auto &requiredStreamState = commandList->getRequiredStreamState();
|
||||
// Provide cmdlist required state as cmdlist final state, so csr state does not transition to final
|
||||
// By preserving required state in csr - keeping csr state not dirty - it will not dispatch 1st command list pipeline select/systolic in main loop
|
||||
// Csr state will transition to final of 1st command list in main loop
|
||||
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, requiredStreamState);
|
||||
}
|
||||
this->programCommandQueueDebugCmdsForSourceLevelOrL0DebuggerIfEnabled(ctx.isDebugEnabled, child);
|
||||
this->programStateBaseAddressWithGsbaIfDirty(ctx, phCommandLists[0], child);
|
||||
this->programCsrBaseAddressIfPreemptionModeInitial(ctx.isPreemptionModeInitial, child);
|
||||
@ -157,7 +168,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
|
||||
this->programActivePartitionConfig(ctx.isProgramActivePartitionConfigRequired, child);
|
||||
this->encodeKernelArgsBufferAndMakeItResident();
|
||||
auto &csrStateProperties = csr->getStreamProperties();
|
||||
|
||||
bool shouldProgramVfe = this->csr->getLogicalStateHelper() && ctx.frontEndStateDirty;
|
||||
this->programFrontEndAndClearDirtyFlag(shouldProgramVfe, ctx, child, csrStateProperties);
|
||||
|
||||
@ -171,7 +182,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
|
||||
auto &finalStreamState = commandList->getFinalStreamState();
|
||||
|
||||
this->updateOneCmdListPreemptionModeAndCtxStatePreemption(ctx, commandList->getCommandListPreemptionMode(), child);
|
||||
this->updatePipelineSelectState(commandList);
|
||||
this->programOneCmdListPipelineSelect(commandList, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
this->programOneCmdListFrontEndIfDirty(ctx, child, csrStateProperties, requiredStreamState, finalStreamState);
|
||||
|
||||
this->patchCommands(*commandList, this->csr->getScratchSpaceController()->getScratchPatchAddress());
|
||||
@ -388,11 +399,6 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateFrontEndCmdSizeForMultipleCommandL
|
||||
return estimatedSize;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelect() {
|
||||
return NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programPipelineSelectIfGpgpuDisabled(NEO::LinearStream &cmdStream) {
|
||||
bool gpgpuEnabled = this->csr->getPreambleSetFlag();
|
||||
@ -647,17 +653,14 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
uint32_t numCommandLists) {
|
||||
|
||||
size_t linearStreamSizeEstimate = 0u;
|
||||
bool gpgpuEnabled = csr->getPreambleSetFlag();
|
||||
|
||||
if (!gpgpuEnabled) {
|
||||
linearStreamSizeEstimate += estimatePipelineSelect();
|
||||
}
|
||||
|
||||
linearStreamSizeEstimate += estimateFrontEndCmdSize(ctx.frontEndStateDirty);
|
||||
linearStreamSizeEstimate += estimatePipelineSelectCmdSize();
|
||||
|
||||
if (frontEndTrackingEnabled()) {
|
||||
if (this->pipelineSelectStateTracking || frontEndTrackingEnabled()) {
|
||||
bool frontEndStateDirtyCopy = ctx.frontEndStateDirty;
|
||||
auto streamPropertiesCopy = csr->getStreamProperties();
|
||||
bool gpgpuEnabledCopy = csr->getPreambleSetFlag();
|
||||
for (uint32_t i = 0; i < numCommandLists; i++) {
|
||||
auto cmdList = CommandList::fromHandle(phCommandLists[i]);
|
||||
auto &requiredStreamState = cmdList->getRequiredStreamState();
|
||||
@ -665,6 +668,7 @@ size_t CommandQueueHw<gfxCoreFamily>::estimateLinearStreamSizeComplementary(
|
||||
|
||||
linearStreamSizeEstimate += estimateFrontEndCmdSizeForMultipleCommandLists(frontEndStateDirtyCopy, ctx.engineInstanced, cmdList,
|
||||
streamPropertiesCopy, requiredStreamState, finalStreamState);
|
||||
linearStreamSizeEstimate += estimatePipelineSelectCmdSizeForMultipleCommandLists(streamPropertiesCopy, requiredStreamState, finalStreamState, gpgpuEnabledCopy);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1119,14 +1123,59 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::handleSubmissionAndCompletionResults(
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::updatePipelineSelectState(CommandList *commandList) {
|
||||
auto &streamProperties = this->csr->getStreamProperties();
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelectCmdSize() {
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
bool gpgpuEnabled = csr->getPreambleSetFlag();
|
||||
return !gpgpuEnabled * NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
auto &requiredStreamState = commandList->getRequiredStreamState();
|
||||
auto &finalStreamState = commandList->getFinalStreamState();
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
size_t CommandQueueHw<gfxCoreFamily>::estimatePipelineSelectCmdSizeForMultipleCommandLists(NEO::StreamProperties &csrStateCopy,
|
||||
const NEO::StreamProperties &cmdListRequired,
|
||||
const NEO::StreamProperties &cmdListFinal,
|
||||
bool &gpgpuEnabled) {
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
streamProperties.pipelineSelect.setProperties(requiredStreamState.pipelineSelect);
|
||||
streamProperties.pipelineSelect.setProperties(finalStreamState.pipelineSelect);
|
||||
size_t singlePipelineSelectSize = NEO::PreambleHelper<GfxFamily>::getCmdSizeForPipelineSelect(device->getHwInfo());
|
||||
size_t estimatedSize = 0;
|
||||
|
||||
csrStateCopy.pipelineSelect.setProperties(cmdListRequired.pipelineSelect);
|
||||
if (!gpgpuEnabled || csrStateCopy.pipelineSelect.isDirty()) {
|
||||
estimatedSize += singlePipelineSelectSize;
|
||||
gpgpuEnabled = true;
|
||||
}
|
||||
|
||||
csrStateCopy.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
|
||||
|
||||
return estimatedSize;
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
void CommandQueueHw<gfxCoreFamily>::programOneCmdListPipelineSelect(CommandList *commandList, NEO::LinearStream &commandStream, NEO::StreamProperties &csrState,
|
||||
const NEO::StreamProperties &cmdListRequired, const NEO::StreamProperties &cmdListFinal) {
|
||||
if (!this->pipelineSelectStateTracking) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool preambleSet = csr->getPreambleSetFlag();
|
||||
csrState.pipelineSelect.setProperties(cmdListRequired.pipelineSelect);
|
||||
|
||||
if (!preambleSet || csrState.pipelineSelect.isDirty()) {
|
||||
NEO::PipelineSelectArgs args = {
|
||||
!!csrState.pipelineSelect.systolicMode.value,
|
||||
false,
|
||||
false,
|
||||
commandList->getSystolicModeSupport()};
|
||||
|
||||
NEO::PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, args, device->getHwInfo());
|
||||
csr->setPreambleSetFlag(true);
|
||||
}
|
||||
|
||||
csrState.pipelineSelect.setProperties(cmdListFinal.pipelineSelect);
|
||||
}
|
||||
|
||||
template <GFXCORE_FAMILY gfxCoreFamily>
|
||||
|
@ -23,4 +23,12 @@ bool L0HwHelper::enableMultiReturnPointCommandList() {
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
bool L0HwHelper::enablePipelineSelectStateTracking() {
|
||||
constexpr bool defaultValue = false;
|
||||
if (NEO::DebugManager.flags.EnablePipelineSelectTracking.get() != -1) {
|
||||
return !!NEO::DebugManager.flags.EnablePipelineSelectTracking.get();
|
||||
}
|
||||
return defaultValue;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
@ -31,6 +31,7 @@ class L0HwHelper {
|
||||
public:
|
||||
static L0HwHelper &get(GFXCORE_FAMILY gfxCore);
|
||||
static bool enableMultiReturnPointCommandList();
|
||||
static bool enablePipelineSelectStateTracking();
|
||||
virtual void setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupT &group) const = 0;
|
||||
virtual L0::Event *createEvent(L0::EventPool *eventPool, const ze_event_desc_t *desc, L0::Device *device) const = 0;
|
||||
|
||||
|
@ -11,6 +11,7 @@ set(L0_FIXTURES_SOURCES
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/aub_csr_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/cmdlist_fixture.inl
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.h
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/device_fixture.cpp
|
||||
${CMAKE_CURRENT_SOURCE_DIR}/host_pointer_manager_fixture.h
|
||||
|
@ -118,66 +118,9 @@ void MultiReturnCommandListFixture::setUp() {
|
||||
ModuleMutableCommandListFixture::setUp(REVISION_B);
|
||||
}
|
||||
|
||||
void CmdListPipelineSelectStateFixture::testBody() {
|
||||
const ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
||||
auto &cmdListFinalState = commandList->getFinalStreamState();
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
auto result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
commandList->close();
|
||||
|
||||
auto &csrState = commandQueue->csr->getStreamProperties();
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
|
||||
|
||||
commandList->reset();
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
commandList->close();
|
||||
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
|
||||
void CmdListPipelineSelectStateFixture::setUp() {
|
||||
DebugManager.flags.EnablePipelineSelectTracking.set(1);
|
||||
ModuleMutableCommandListFixture::setUp();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
@ -64,89 +64,7 @@ void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
uint64_t firstStoreRegMemAddress,
|
||||
uint32_t secondLoadRegisterRegSrcAddress,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
bool workloadPartition) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
|
||||
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
itor++;
|
||||
startIt = itor;
|
||||
}
|
||||
bool workloadPartition);
|
||||
|
||||
struct ModuleMutableCommandListFixture : public ModuleImmutableDataFixture {
|
||||
void setUp() {
|
||||
@ -168,7 +86,12 @@ struct MultiReturnCommandListFixture : public ModuleMutableCommandListFixture {
|
||||
};
|
||||
|
||||
struct CmdListPipelineSelectStateFixture : public ModuleMutableCommandListFixture {
|
||||
void setUp();
|
||||
|
||||
template <typename FamilyType>
|
||||
void testBody();
|
||||
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
} // namespace ult
|
||||
|
369
level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl
Normal file
369
level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl
Normal file
@ -0,0 +1,369 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
*/
|
||||
|
||||
#include "shared/test/common/helpers/unit_test_helper.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
|
||||
template <typename FamilyType>
|
||||
void validateTimestampRegisters(GenCmdList &cmdList,
|
||||
GenCmdList::iterator &startIt,
|
||||
uint32_t firstLoadRegisterRegSrcAddress,
|
||||
uint64_t firstStoreRegMemAddress,
|
||||
uint32_t secondLoadRegisterRegSrcAddress,
|
||||
uint64_t secondStoreRegMemAddress,
|
||||
bool workloadPartition) {
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_MATH = typename FamilyType::MI_MATH;
|
||||
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
|
||||
|
||||
constexpr uint32_t mask = 0xfffffffe;
|
||||
|
||||
auto itor = find<MI_LOAD_REGISTER_REG *>(startIt, cmdList.end());
|
||||
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(firstLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(firstStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadReg = genCmdCast<MI_LOAD_REGISTER_REG *>(*itor);
|
||||
EXPECT_EQ(secondLoadRegisterRegSrcAddress, cmdLoadReg->getSourceRegisterAddress());
|
||||
EXPECT_EQ(CS_GPR_R0, cmdLoadReg->getDestinationRegisterAddress());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdLoadImm = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R1, cmdLoadImm->getRegisterOffset());
|
||||
EXPECT_EQ(mask, cmdLoadImm->getDataDword());
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMath = genCmdCast<MI_MATH *>(*itor);
|
||||
EXPECT_EQ(3u, cmdMath->DW0.BitField.DwordLength);
|
||||
}
|
||||
|
||||
itor++;
|
||||
{
|
||||
ASSERT_NE(cmdList.end(), itor);
|
||||
auto cmdMem = genCmdCast<MI_STORE_REGISTER_MEM *>(*itor);
|
||||
EXPECT_EQ(CS_GPR_R2, cmdMem->getRegisterAddress());
|
||||
EXPECT_EQ(secondStoreRegMemAddress, cmdMem->getMemoryAddress());
|
||||
if (workloadPartition) {
|
||||
EXPECT_TRUE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
} else {
|
||||
EXPECT_FALSE(UnitTestHelper<FamilyType>::getWorkloadPartitionForStoreRegisterMemCmd(*cmdMem));
|
||||
}
|
||||
}
|
||||
itor++;
|
||||
startIt = itor;
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
void CmdListPipelineSelectStateFixture::testBody() {
|
||||
using PIPELINE_SELECT = typename FamilyType::PIPELINE_SELECT;
|
||||
|
||||
const ze_group_count_t groupCount{1, 1, 1};
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
|
||||
void *currentBuffer = nullptr;
|
||||
|
||||
auto &cmdlistRequiredState = commandList->getRequiredStreamState();
|
||||
auto &cmdListFinalState = commandList->getFinalStreamState();
|
||||
auto &csrState = commandQueue->csr->getStreamProperties();
|
||||
|
||||
auto commandListHandle = commandList->toHandle();
|
||||
|
||||
auto &commandListStream = *commandList->commandContainer.getCommandStream();
|
||||
auto &cmdQueueStream = commandQueue->commandStream;
|
||||
|
||||
GenCmdList cmdList;
|
||||
std::vector<GenCmdList::iterator> pipelineSelectList;
|
||||
size_t sizeBefore = 0;
|
||||
size_t sizeAfter = 0;
|
||||
auto result = ZE_RESULT_SUCCESS;
|
||||
|
||||
{
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
|
||||
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(0, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->close();
|
||||
|
||||
sizeBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = cmdQueueStream.getUsed();
|
||||
|
||||
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->reset();
|
||||
}
|
||||
|
||||
{
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->close();
|
||||
|
||||
sizeBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = cmdQueueStream.getUsed();
|
||||
|
||||
EXPECT_EQ(0, csrState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->reset();
|
||||
}
|
||||
|
||||
{
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 0;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(0, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
auto pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_FALSE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->close();
|
||||
|
||||
sizeBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = cmdQueueStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_EQ(1u, pipelineSelectList.size());
|
||||
|
||||
pipelineSelectCmd = genCmdCast<PIPELINE_SELECT *>(*pipelineSelectList[0]);
|
||||
EXPECT_TRUE(NEO::UnitTestHelper<FamilyType>::getSystolicFlagValueFromPipelineSelectCommand(*pipelineSelectCmd));
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->reset();
|
||||
}
|
||||
{
|
||||
mockKernelImmData->kernelDescriptor->kernelAttributes.flags.usesSystolicPipelineSelectMode = 1;
|
||||
sizeBefore = commandListStream.getUsed();
|
||||
result = commandList->appendLaunchKernel(kernel->toHandle(), &groupCount, nullptr, 0, nullptr, launchParams);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = commandListStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, cmdlistRequiredState.pipelineSelect.systolicMode.value);
|
||||
EXPECT_EQ(1, cmdListFinalState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(commandListStream.getCpuBase(), sizeBefore);
|
||||
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||
|
||||
cmdList.clear();
|
||||
pipelineSelectList.clear();
|
||||
commandList->close();
|
||||
|
||||
sizeBefore = cmdQueueStream.getUsed();
|
||||
result = commandQueue->executeCommandLists(1, &commandListHandle, nullptr, false);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
sizeAfter = cmdQueueStream.getUsed();
|
||||
|
||||
EXPECT_EQ(1, csrState.pipelineSelect.systolicMode.value);
|
||||
|
||||
currentBuffer = ptrOffset(cmdQueueStream.getCpuBase(), sizeBefore);
|
||||
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList,
|
||||
currentBuffer,
|
||||
(sizeAfter - sizeBefore)));
|
||||
pipelineSelectList = findAll<PIPELINE_SELECT *>(cmdList.begin(), cmdList.end());
|
||||
EXPECT_EQ(0u, pipelineSelectList.size());
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
@ -63,6 +63,7 @@ struct WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::patternAllocations;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
using BaseClass::unifiedMemoryControls;
|
||||
using BaseClass::updateStreamProperties;
|
||||
@ -123,6 +124,7 @@ struct WhiteBox<L0::CommandListCoreFamilyImmediate<gfxCoreFamily>>
|
||||
using BaseClass::isFlushTaskSubmissionEnabled;
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
using BaseClass::requiredStreamState;
|
||||
|
||||
WhiteBox() : BaseClass(BaseClass::defaultNumIddsPerBlock) {}
|
||||
@ -147,6 +149,7 @@ struct WhiteBox<::L0::CommandList> : public ::L0::CommandListImp {
|
||||
using BaseClass::multiReturnPointCommandList;
|
||||
using BaseClass::nonImmediateLogicalStateHelper;
|
||||
using BaseClass::partitionCount;
|
||||
using BaseClass::pipelineSelectStateTracking;
|
||||
|
||||
WhiteBox(Device *device);
|
||||
~WhiteBox() override;
|
||||
|
@ -36,6 +36,7 @@ struct WhiteBox<::L0::CommandQueue> : public ::L0::CommandQueueImp {
|
||||
using CommandQueue::internalUsage;
|
||||
using CommandQueue::multiReturnPointCommandList;
|
||||
using CommandQueue::partitionCount;
|
||||
using CommandQueue::pipelineSelectStateTracking;
|
||||
|
||||
WhiteBox(Device *device, NEO::CommandStreamReceiver *csr,
|
||||
const ze_command_queue_desc_t *desc);
|
||||
@ -67,6 +68,7 @@ struct MockCommandQueueHw : public L0::CommandQueueHw<gfxCoreFamily> {
|
||||
using L0::CommandQueue::internalUsage;
|
||||
using L0::CommandQueue::multiReturnPointCommandList;
|
||||
using L0::CommandQueue::partitionCount;
|
||||
using L0::CommandQueue::pipelineSelectStateTracking;
|
||||
using L0::CommandQueue::preemptionCmdSyncProgramming;
|
||||
using L0::CommandQueueImp::csr;
|
||||
using typename BaseClass::CommandListExecutionContext;
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/source/event/event.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
|
||||
namespace L0 {
|
||||
|
@ -8,7 +8,7 @@
|
||||
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
|
@ -11,7 +11,7 @@
|
||||
#include "shared/test/common/test_macros/hw_test.h"
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist_hw_immediate.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "level_zero/core/source/builtin/builtin_functions_lib_impl.h"
|
||||
#include "level_zero/core/source/kernel/kernel_imp.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_built_ins.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
#include "level_zero/core/source/fence/fence.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/cmdlist_fixture.inl"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/module_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
@ -538,7 +538,7 @@ using CmdListPipelineSelectStateTest = Test<CmdListPipelineSelectStateFixture>;
|
||||
using SystolicSupport = IsAnyProducts<IGFX_ALDERLAKE_P, IGFX_XE_HP_SDV, IGFX_DG2, IGFX_PVC>;
|
||||
HWTEST2_F(CmdListPipelineSelectStateTest,
|
||||
givenAppendSystolicKernelToCommandListWhenExecutingCommandListThenPipelineSelectStateIsTrackedCorrectly, SystolicSupport) {
|
||||
testBody();
|
||||
testBody<FamilyType>();
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
|
@ -261,6 +261,8 @@ CompletionStamp CommandStreamReceiverHw<GfxFamily>::flushTask(
|
||||
}
|
||||
|
||||
auto newL3Config = PreambleHelper<GfxFamily>::getL3Config(hwInfo, dispatchFlags.useSLM);
|
||||
|
||||
dispatchFlags.pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
|
||||
auto isSystolicPipelineSelectModeChanged = (this->lastSystolicPipelineSelectMode != dispatchFlags.pipelineSelectArgs.systolicPipelineSelectMode) && this->systolicModeConfigurable;
|
||||
|
||||
auto requiresCoherency = hwHelper.forceNonGpuCoherencyWA(dispatchFlags.requiresCoherency);
|
||||
|
@ -62,7 +62,6 @@ void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &com
|
||||
if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.systolicPipelineSelectMode || !isPreambleSent) {
|
||||
auto &hwInfo = peekHwInfo();
|
||||
if (!isPipelineSelectAlreadyProgrammed()) {
|
||||
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
|
||||
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, pipelineSelectArgs, hwInfo);
|
||||
}
|
||||
this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired;
|
||||
|
@ -48,7 +48,6 @@ template <typename GfxFamily>
|
||||
void CommandStreamReceiverHw<GfxFamily>::programPipelineSelect(LinearStream &commandStream, PipelineSelectArgs &pipelineSelectArgs) {
|
||||
if (csrSizeRequestFlags.mediaSamplerConfigChanged || csrSizeRequestFlags.systolicPipelineSelectMode || !isPreambleSent) {
|
||||
auto &hwInfo = peekHwInfo();
|
||||
pipelineSelectArgs.systolicPipelineSelectSupport = this->systolicModeConfigurable;
|
||||
PreambleHelper<GfxFamily>::programPipelineSelect(&commandStream, pipelineSelectArgs, hwInfo);
|
||||
this->lastMediaSamplerConfig = pipelineSelectArgs.mediaSamplerRequired;
|
||||
this->lastSystolicPipelineSelectMode = pipelineSelectArgs.systolicPipelineSelectMode;
|
||||
|
@ -474,6 +474,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, ForceHostPointerImport, -1, "-1: default, 0: dis
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, ProgramExtendedPipeControlPriorToNonPipelinedStateCommand, -1, "-1: default, 0: disable, 1: enable, Program additional extended version of PIPE CONTROL command before non pipelined state command")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, OverrideDrmRegion, -1, "-1: disable, 0+: override to given memory region for all allocations")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, MultiReturnPointCommandList, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag creates multiple return point from List to Queue for Front End reconfiguration on Queue buffer for single List")
|
||||
DECLARE_DEBUG_VARIABLE(int32_t, EnablePipelineSelectTracking, -1, "-1: default: disabled, 0: disabled, 1: enabled. This flag enables optimization that limits number of pipeline select dispatched by command lists")
|
||||
|
||||
/* Binary Cache */
|
||||
DECLARE_DEBUG_VARIABLE(bool, BinaryCacheTrace, false, "enable cl_cache to produce .trace files with information about hash computation")
|
||||
|
@ -308,6 +308,7 @@ ExperimentalSetWalkerPartitionType = -1
|
||||
UseImmDataWriteModeOnPostSyncOperation = 0
|
||||
OverridePostSyncMocs = -1
|
||||
EnableImmediateVmBindExt = -1
|
||||
EnablePipelineSelectTracking = -1
|
||||
ForceExecutionTile = -1
|
||||
DisableCachingForHeaps = 0
|
||||
OverrideTimestampPacketSize = -1
|
||||
|
Reference in New Issue
Block a user