mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-19 16:24:18 +08:00
Add MI_MATH MOCS support
Related-To: NEO-7458 Signed-off-by: Dunajski, Bartosz <bartosz.dunajski@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
c28f0c72ea
commit
c00c310cf4
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (C) 2022 Intel Corporation
|
||||
* Copyright (C) 2022-2023 Intel Corporation
|
||||
*
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
@@ -24,6 +24,12 @@ class EncodeAluHelper {
|
||||
aluOps.miMath.DW0.BitField.DwordLength = AluCount - 1;
|
||||
}
|
||||
|
||||
void setMocs([[maybe_unused]] uint32_t mocs) {
|
||||
if constexpr (GfxFamily::isUsingMiMathMocs) {
|
||||
aluOps.miMath.DW0.BitField.MemoryObjectControlState = mocs;
|
||||
}
|
||||
}
|
||||
|
||||
void setNextAlu(AluRegisters opcode) {
|
||||
setNextAlu(opcode, AluRegisters::OPCODE_NONE, AluRegisters::OPCODE_NONE);
|
||||
}
|
||||
|
||||
@@ -14,6 +14,8 @@
|
||||
#include "shared/source/direct_submission/direct_submission_hw_diagnostic_mode.h"
|
||||
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
|
||||
#include "shared/source/execution_environment/root_device_environment.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/gmm_lib.h"
|
||||
#include "shared/source/helpers/aligned_memory.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
@@ -102,6 +104,8 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
|
||||
uint64_t loopSectionStartAddress = schedulerStartAddress + RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<GfxFamily>::loopStartSectionStart;
|
||||
|
||||
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
|
||||
// 1. Init section
|
||||
{
|
||||
EncodeMiPredicate<GfxFamily>::encode(schedulerCmdStream, MiPredicateType::Disable);
|
||||
@@ -133,6 +137,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 10> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_6);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_SHL);
|
||||
@@ -168,6 +173,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R8 + 4, static_cast<uint32_t>(deferredTasksListGpuVa >> 32), true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 14> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_7);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_SHL);
|
||||
@@ -238,6 +244,7 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::dispatchStaticRelaxedOrderingSch
|
||||
LriHelper<GfxFamily>::program(&schedulerCmdStream, CS_GPR_R10 + 4, 0, true);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 4> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_10);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_ADD);
|
||||
@@ -815,7 +822,10 @@ void DirectSubmissionHw<GfxFamily, Dispatcher>::preinitializeRelaxedOrderingSect
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R8, 8, true);
|
||||
LriHelper<GfxFamily>::program(&stream, CS_GPR_R8 + 4, 0, true);
|
||||
|
||||
const uint32_t miMathMocs = this->rootDeviceEnvironment.getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER);
|
||||
|
||||
EncodeAluHelper<GfxFamily, 9> aluHelper;
|
||||
aluHelper.setMocs(miMathMocs);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCB, AluRegisters::R_8);
|
||||
aluHelper.setNextAlu(AluRegisters::OPCODE_SHL);
|
||||
|
||||
@@ -27,6 +27,7 @@ struct Gen11 {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = false;
|
||||
static constexpr bool isUsingMiMathMocs = false;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -26,6 +26,7 @@ struct Gen12Lp {
|
||||
static constexpr uint32_t stateComputeModeForceNonCoherentMask = (0b11u << 3);
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = false;
|
||||
static constexpr bool isUsingMiMathMocs = false;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -28,6 +28,7 @@ struct Gen8 {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = false;
|
||||
static constexpr bool isUsingMiMathMocs = false;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -27,6 +27,7 @@ struct Gen9 {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = false;
|
||||
static constexpr bool isUsingMiMathMocs = false;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -652,7 +652,8 @@ typedef struct tagMI_MATH {
|
||||
union _DW0 {
|
||||
struct _BitField {
|
||||
uint32_t DwordLength : BITFIELD_RANGE(0, 7);
|
||||
uint32_t Reserved : BITFIELD_RANGE(8, 22);
|
||||
uint32_t MemoryObjectControlState : BITFIELD_RANGE(8, 14);
|
||||
uint32_t Reserved : BITFIELD_RANGE(15, 22);
|
||||
uint32_t InstructionOpcode : BITFIELD_RANGE(23, 28);
|
||||
uint32_t InstructionType : BITFIELD_RANGE(29, 31);
|
||||
} BitField;
|
||||
|
||||
@@ -401,7 +401,8 @@ typedef struct tagMI_MATH {
|
||||
union _DW0 {
|
||||
struct _BitField {
|
||||
uint32_t DwordLength : BITFIELD_RANGE(0, 7);
|
||||
uint32_t Reserved : BITFIELD_RANGE(8, 22);
|
||||
uint32_t MemoryObjectControlState : BITFIELD_RANGE(8, 14);
|
||||
uint32_t Reserved : BITFIELD_RANGE(15, 22);
|
||||
uint32_t InstructionOpcode : BITFIELD_RANGE(23, 28);
|
||||
uint32_t InstructionType : BITFIELD_RANGE(29, 31);
|
||||
} BitField;
|
||||
|
||||
@@ -32,6 +32,7 @@ struct XeHpCore {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = true;
|
||||
static constexpr bool isUsingMiMathMocs = false;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -34,6 +34,7 @@ struct XeHpcCore {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = true;
|
||||
static constexpr bool isUsingMiSetPredicate = true;
|
||||
static constexpr bool isUsingMiMathMocs = true;
|
||||
|
||||
struct StateBaseAddressStateSupport {
|
||||
static constexpr bool globalAtomics = false;
|
||||
|
||||
@@ -34,6 +34,7 @@ struct XeHpgCore {
|
||||
static constexpr bool isUsingGenericMediaStateClear = true;
|
||||
static constexpr bool isUsingMiMemFence = false;
|
||||
static constexpr bool isUsingMiSetPredicate = true;
|
||||
static constexpr bool isUsingMiMathMocs = true;
|
||||
|
||||
struct FrontEndStateSupport {
|
||||
static constexpr bool scratchSize = true;
|
||||
|
||||
@@ -12,6 +12,8 @@
|
||||
#include "shared/source/direct_submission/direct_submission_hw.h"
|
||||
#include "shared/source/direct_submission/dispatchers/render_dispatcher.h"
|
||||
#include "shared/source/direct_submission/relaxed_ordering_helper.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/gmm_helper/gmm_lib.h"
|
||||
#include "shared/source/helpers/flush_stamp.h"
|
||||
#include "shared/source/helpers/gfx_core_helper.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
@@ -1054,7 +1056,7 @@ struct DirectSubmissionRelaxedOrderingTests : public DirectSubmissionDispatchBuf
|
||||
bool verifyDynamicSchedulerProgramming(LinearStream &cs, uint64_t schedulerAllocationGpuVa, uint64_t semaphoreGpuVa, uint32_t semaphoreValue, size_t offset, size_t &endOffset);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit);
|
||||
bool verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs);
|
||||
|
||||
template <typename FamilyType>
|
||||
bool verifyMiPredicate(void *miPredicateCmd, MiPredicateType predicateType);
|
||||
@@ -1329,7 +1331,7 @@ bool DirectSubmissionRelaxedOrderingTests::verifyConditionalDataRegBbStart(void
|
||||
}
|
||||
|
||||
template <typename FamilyType>
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit) {
|
||||
bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(GraphicsAllocation &schedulerAllocation, uint64_t deferredTaskListVa, uint32_t expectedQueueSizeLimit, uint32_t miMathMocs) {
|
||||
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
|
||||
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
|
||||
using MI_LOAD_REGISTER_REG = typename FamilyType::MI_LOAD_REGISTER_REG;
|
||||
@@ -1412,6 +1414,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (FamilyType::isUsingMiMathMocs) {
|
||||
if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_2)) {
|
||||
return false;
|
||||
@@ -1504,6 +1512,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (FamilyType::isUsingMiMathMocs) {
|
||||
if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1)) {
|
||||
return false;
|
||||
@@ -1646,6 +1660,12 @@ bool DirectSubmissionRelaxedOrderingTests::verifyStaticSchedulerProgramming(Grap
|
||||
return false;
|
||||
}
|
||||
|
||||
if constexpr (FamilyType::isUsingMiMathMocs) {
|
||||
if (miMathCmd->DW0.BitField.MemoryObjectControlState != miMathMocs) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
if (!verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_9)) {
|
||||
return false;
|
||||
@@ -1793,7 +1813,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenDebugFlagSetWhenDispatching
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), 123));
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), 123,
|
||||
pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)));
|
||||
}
|
||||
|
||||
HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispatchingWorkThenIncraseQueueSize, IsAtLeastXeHpcCore) {
|
||||
@@ -1806,7 +1827,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, givenNewNumberOfClientsWhenDispa
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
EXPECT_EQ(RelaxedOrderingHelper::queueSizeMultiplier, directSubmission.currentRelaxedOrderingQueueSize);
|
||||
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier));
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier,
|
||||
pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)));
|
||||
|
||||
const uint64_t expectedQueueSizeValueVa = directSubmission.relaxedOrderingSchedulerAllocation->getGpuAddress() +
|
||||
RelaxedOrderingHelper::StaticSchedulerSizeAndOffsetSection<FamilyType>::drainRequestSectionStart +
|
||||
@@ -1876,7 +1898,8 @@ HWTEST2_F(DirectSubmissionRelaxedOrderingTests, whenInitializingThenDispatchStat
|
||||
|
||||
EXPECT_EQ(1u, directSubmission.dispatchStaticRelaxedOrderingSchedulerCalled);
|
||||
EXPECT_TRUE(verifyStaticSchedulerProgramming<FamilyType>(*directSubmission.relaxedOrderingSchedulerAllocation,
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier));
|
||||
directSubmission.deferredTasksListAllocation->getGpuAddress(), RelaxedOrderingHelper::queueSizeMultiplier,
|
||||
pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER)));
|
||||
}
|
||||
|
||||
{
|
||||
@@ -2031,6 +2054,10 @@ HWTEST_F(DirectSubmissionRelaxedOrderingTests, whenDispatchingWorkThenDispatchTa
|
||||
auto miMathCmd = reinterpret_cast<MI_MATH *>(++lriCmd);
|
||||
EXPECT_EQ(8u, miMathCmd->DW0.BitField.DwordLength);
|
||||
|
||||
if constexpr (FamilyType::isUsingMiMathMocs) {
|
||||
EXPECT_EQ(miMathCmd->DW0.BitField.MemoryObjectControlState, pDevice->getRootDeviceEnvironment().getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER));
|
||||
}
|
||||
|
||||
auto miAluCmd = reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(++miMathCmd);
|
||||
EXPECT_TRUE(verifyAlu<FamilyType>(miAluCmd, AluRegisters::OPCODE_LOAD, AluRegisters::R_SRCA, AluRegisters::R_1));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user