feature: update GRF register implementation

Related-To: NEO-8314
Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
Jitendra Sharma
2024-10-15 11:39:53 +00:00
committed by Compute-Runtime-Automation
parent a149336ea1
commit 9bd4878841
15 changed files with 71 additions and 75 deletions

View File

@@ -102,6 +102,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
virtual ze_rtas_format_exp_t getSupportedRTASFormat() const = 0;
virtual bool platformSupportsImmediateComputeFlushTask() const = 0;
virtual zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const = 0;
virtual uint32_t getGrfRegisterCount(uint32_t *regPtr) const = 0;
virtual uint32_t getCmdListWaitOnMemoryDataSize() const = 0;
virtual bool hasUnifiedPostSyncAllocationLayout() const = 0;
virtual uint32_t getImmediateWritePostSyncOffset() const = 0;
@@ -153,6 +154,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
ze_rtas_format_exp_t getSupportedRTASFormat() const override;
bool platformSupportsImmediateComputeFlushTask() const override;
zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const override;
uint32_t getGrfRegisterCount(uint32_t *regPtr) const override;
uint32_t getCmdListWaitOnMemoryDataSize() const override;
bool hasUnifiedPostSyncAllocationLayout() const override;
uint32_t getImmediateWritePostSyncOffset() const override;

View File

@@ -79,4 +79,9 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
return 128;
}
} // namespace L0

View File

@@ -14,4 +14,14 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
return ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
bool largeGrfModeEnabled = false;
largeGrfModeEnabled = regPtr[0] & 0x2000;
if (largeGrfModeEnabled) {
return 256;
}
return 128;
}
} // namespace L0

View File

@@ -23,6 +23,16 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
return ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU;
}
template <typename Family>
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
bool largeGrfModeEnabled = false;
largeGrfModeEnabled = ((regPtr[1] & 0x6000) == 0x6000);
if (largeGrfModeEnabled) {
return 256;
}
return 128;
}
/*
* Xe2 stall sample data item format:
*

View File

@@ -28,6 +28,11 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetRegsetTypeForLargeG
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
}
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetGrfRegisterCountIsCalledThen128IsRetuned) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
EXPECT_EQ(128u, l0GfxCoreHelper.getGrfRegisterCount(nullptr));
}
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();

View File

@@ -1335,31 +1335,15 @@ void DebugSession::updateGrfRegisterSetProperties(EuThread::ThreadId thread, uin
return;
}
auto &gfxCoreHelper = this->connectedDevice->getGfxCoreHelper();
if (!gfxCoreHelper.largeGrfModeSupported()) {
return;
}
// update GRF, if large GRF is enabled
auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
auto regsetType = l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection();
bool largeGrfModeEnabled = false;
const auto regSize = std::max(getRegisterSize(regsetType), 64u);
auto reg = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
memset(reg.get(), 0, regSize);
readRegistersImp(thread, regsetType, 0, 1, reg.get());
auto regPtr = reg.get();
if (regsetType == ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU) {
largeGrfModeEnabled = regPtr[0] & 0x2000;
} else if (regsetType == ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU) {
largeGrfModeEnabled = ((regPtr[1] & 0x6000) == 0x6000);
}
if (largeGrfModeEnabled) {
for (uint32_t i = 0; i < *pCount; i++) {
if (pRegisterSetProperties[i].type == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
pRegisterSetProperties[i].count = 256;
}
for (uint32_t i = 0; i < *pCount; i++) {
if (pRegisterSetProperties[i].type == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
pRegisterSetProperties[i].count = l0GfxCoreHelper.getGrfRegisterCount(reg.get());
}
}
}

View File

@@ -2703,37 +2703,6 @@ HWTEST2_F(DebugSessionRegistersAccessTest,
EXPECT_EQ(128u, threadRegsetProps[0].count);
}
TEST_F(DebugSessionRegistersAccessTest, givenGetThreadRegisterSetPropertiesCalledPropertieAreTheSameAsgetRegisterSetProperties) {
auto mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2);
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
uint32_t count = 0;
uint32_t threadCount = 0;
ze_device_thread_t thread = stoppedThread;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, nullptr));
EXPECT_EQ(13u, count);
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
ASSERT_EQ(threadCount, count);
std::vector<zet_debug_regset_properties_t> regsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, regsetProps.data()));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &count, threadRegsetProps.data()));
for (size_t i = 0; i < count; i++) {
EXPECT_EQ(regsetProps[i].stype, threadRegsetProps[i].stype);
EXPECT_EQ(regsetProps[i].pNext, threadRegsetProps[i].pNext);
EXPECT_EQ(regsetProps[i].version, threadRegsetProps[i].version);
EXPECT_EQ(regsetProps[i].generalFlags, threadRegsetProps[i].generalFlags);
EXPECT_EQ(regsetProps[i].deviceFlags, threadRegsetProps[i].deviceFlags);
EXPECT_EQ(regsetProps[i].count, threadRegsetProps[i].count);
EXPECT_EQ(regsetProps[i].bitSize, threadRegsetProps[i].bitSize);
EXPECT_EQ(regsetProps[i].byteSize, threadRegsetProps[i].byteSize);
}
}
TEST_F(DebugSessionRegistersAccessTest, givenUnsupportedRegisterTypeWhenReadRegistersCalledThenErrorInvalidArgumentIsReturned) {
session->areRequestedThreadsStoppedReturnValue = 1;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zetDebugReadRegisters(session->toHandle(), stoppedThread, 0x12345, 0, 1, nullptr));

View File

@@ -374,7 +374,7 @@ TEST_F(DebugApiTest, givenGetRegisterSetPropertiesCalledWithV3HeaderCorrectPrope
validateRegsetProps(regsetProps[1], ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 256, 32);
validateRegsetProps(regsetProps[2], ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 32, 4);
validateRegsetProps(regsetProps[3], ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 32, 4);
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 128, 16);
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 160, 20);
validateRegsetProps(regsetProps[5], ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 128, 16);
validateRegsetProps(regsetProps[6], ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 128, 16);
validateRegsetProps(regsetProps[7], ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 10, 256, 32);
@@ -418,7 +418,7 @@ TEST_F(DebugApiTest, givenGetRegisterSetPropertiesCalledWhenV3HeaderHeaplessThen
validateRegsetProps(regsetProps[1], ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 256, 32);
validateRegsetProps(regsetProps[2], ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 32, 4);
validateRegsetProps(regsetProps[3], ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 32, 4);
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 128, 16);
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 160, 20);
validateRegsetProps(regsetProps[5], ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 128, 16);
validateRegsetProps(regsetProps[6], ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 128, 16);
validateRegsetProps(regsetProps[7], ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 10, 256, 32);

View File

@@ -90,5 +90,38 @@ HWTEST2_F(DebugApiTest, givenDeviceWhenDebugAttachIsAvaialbleThenGetPropertiesRe
EXPECT_EQ(ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH, debugProperties.flags);
}
using DebugSessionRegistersAccessTestProductSpecfic = Test<DebugSessionRegistersAccess>;
HWTEST2_F(DebugSessionRegistersAccessTestProductSpecfic, GivenGetThreadRegisterSetPropertiesCalledPropertieAreTheSameAsGetRegisterSetProperties,
IsAtMostXe2HpgCore) {
auto mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2);
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
uint32_t count = 0;
uint32_t threadCount = 0;
ze_device_thread_t thread = stoppedThread;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, nullptr));
EXPECT_EQ(13u, count);
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
ASSERT_EQ(threadCount, count);
std::vector<zet_debug_regset_properties_t> regsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, regsetProps.data()));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(count);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &count, threadRegsetProps.data()));
for (size_t i = 0; i < count; i++) {
EXPECT_EQ(regsetProps[i].stype, threadRegsetProps[i].stype);
EXPECT_EQ(regsetProps[i].pNext, threadRegsetProps[i].pNext);
EXPECT_EQ(regsetProps[i].version, threadRegsetProps[i].version);
EXPECT_EQ(regsetProps[i].generalFlags, threadRegsetProps[i].generalFlags);
EXPECT_EQ(regsetProps[i].deviceFlags, threadRegsetProps[i].deviceFlags);
EXPECT_EQ(regsetProps[i].count, threadRegsetProps[i].count);
EXPECT_EQ(regsetProps[i].bitSize, threadRegsetProps[i].bitSize);
EXPECT_EQ(regsetProps[i].byteSize, threadRegsetProps[i].byteSize);
}
}
} // namespace ult
} // namespace L0

View File

@@ -103,7 +103,6 @@ class GfxCoreHelper {
virtual uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const = 0;
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const = 0;
virtual uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const = 0;
virtual bool largeGrfModeSupported() const = 0;
virtual uint32_t alignSlmSize(uint32_t slmSize) const = 0;
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const = 0;
@@ -308,7 +307,6 @@ class GfxCoreHelperHw : public GfxCoreHelper {
uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const override;
uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const override;
bool largeGrfModeSupported() const override;
uint32_t alignSlmSize(uint32_t slmSize) const override;

View File

@@ -647,11 +647,6 @@ bool GfxCoreHelperHw<GfxFamily>::isChipsetUniqueUUIDSupported() const {
return false;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::largeGrfModeSupported() const {
return false;
}
template <typename GfxFamily>
bool GfxCoreHelperHw<GfxFamily>::isTimestampShiftRequired() const {
return true;

View File

@@ -210,11 +210,6 @@ bool GfxCoreHelperHw<Family>::isChipsetUniqueUUIDSupported() const {
return true;
}
template <>
bool GfxCoreHelperHw<Family>::largeGrfModeSupported() const {
return true;
}
template <typename GfxFamily>
uint32_t GfxCoreHelperHw<GfxFamily>::getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const {
const auto &kernelAttributes = kernelDescriptor.kernelAttributes;

View File

@@ -162,7 +162,7 @@ std::vector<char> createStateSaveAreaHeader(uint32_t version, uint16_t grfNum, u
{4096, 1, 256, 32}, // addr
{4128, 2, 32, 4}, // flag
{4156, 1, 32, 4}, // emask
{4160, 2, 128, 16}, // sr
{4160, 2, 160, 20}, // sr
{4192, 1, 128, 16}, // cr
{4256, 1, 96, 12}, // notification
{4288, 1, 128, 16}, // tdr

View File

@@ -19,11 +19,6 @@
using GfxCoreHelperTestDg1 = GfxCoreHelperTest;
DG1TEST_F(GfxCoreHelperTestDg1, givenDg1SteppingWhenLargeGrfModeSupportedIsQueriedThenFalseIsReturned) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_FALSE(gfxCoreHelper.largeGrfModeSupported());
}
DG1TEST_F(GfxCoreHelperTestDg1, givenDg1SteppingA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
const auto &productHelper = getHelper<ProductHelper>();

View File

@@ -732,11 +732,6 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenParamsWhenCalculateNumThre
}
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenXe2HpgWhenLargeGrfModeSupportedIsQueriedThenTrueIsReturned) {
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
EXPECT_TRUE(gfxCoreHelper.largeGrfModeSupported());
}
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllocationsToFillThenReturnCorrectValue) {
DebugManagerStateRestore restorer;
MockExecutionEnvironment mockExecutionEnvironment{};