mirror of
https://github.com/intel/compute-runtime.git
synced 2025-12-18 22:08:53 +08:00
feature: update GRF register implementation
Related-To: NEO-8314 Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
a149336ea1
commit
9bd4878841
@@ -102,6 +102,7 @@ class L0GfxCoreHelper : public NEO::ApiGfxCoreHelper {
|
||||
virtual ze_rtas_format_exp_t getSupportedRTASFormat() const = 0;
|
||||
virtual bool platformSupportsImmediateComputeFlushTask() const = 0;
|
||||
virtual zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const = 0;
|
||||
virtual uint32_t getGrfRegisterCount(uint32_t *regPtr) const = 0;
|
||||
virtual uint32_t getCmdListWaitOnMemoryDataSize() const = 0;
|
||||
virtual bool hasUnifiedPostSyncAllocationLayout() const = 0;
|
||||
virtual uint32_t getImmediateWritePostSyncOffset() const = 0;
|
||||
@@ -153,6 +154,7 @@ class L0GfxCoreHelperHw : public L0GfxCoreHelper {
|
||||
ze_rtas_format_exp_t getSupportedRTASFormat() const override;
|
||||
bool platformSupportsImmediateComputeFlushTask() const override;
|
||||
zet_debug_regset_type_intel_gpu_t getRegsetTypeForLargeGrfDetection() const override;
|
||||
uint32_t getGrfRegisterCount(uint32_t *regPtr) const override;
|
||||
uint32_t getCmdListWaitOnMemoryDataSize() const override;
|
||||
bool hasUnifiedPostSyncAllocationLayout() const override;
|
||||
uint32_t getImmediateWritePostSyncOffset() const override;
|
||||
|
||||
@@ -79,4 +79,9 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
|
||||
return ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
|
||||
return 128;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -14,4 +14,14 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
|
||||
return ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
|
||||
bool largeGrfModeEnabled = false;
|
||||
largeGrfModeEnabled = regPtr[0] & 0x2000;
|
||||
if (largeGrfModeEnabled) {
|
||||
return 256;
|
||||
}
|
||||
return 128;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -23,6 +23,16 @@ zet_debug_regset_type_intel_gpu_t L0GfxCoreHelperHw<Family>::getRegsetTypeForLar
|
||||
return ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU;
|
||||
}
|
||||
|
||||
template <typename Family>
|
||||
uint32_t L0GfxCoreHelperHw<Family>::getGrfRegisterCount(uint32_t *regPtr) const {
|
||||
bool largeGrfModeEnabled = false;
|
||||
largeGrfModeEnabled = ((regPtr[1] & 0x6000) == 0x6000);
|
||||
if (largeGrfModeEnabled) {
|
||||
return 256;
|
||||
}
|
||||
return 128;
|
||||
}
|
||||
|
||||
/*
|
||||
* Xe2 stall sample data item format:
|
||||
*
|
||||
|
||||
@@ -28,6 +28,11 @@ GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetRegsetTypeForLargeG
|
||||
EXPECT_EQ(ZET_DEBUG_REGSET_TYPE_INVALID_INTEL_GPU, l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection());
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenGetGrfRegisterCountIsCalledThen128IsRetuned) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
EXPECT_EQ(128u, l0GfxCoreHelper.getGrfRegisterCount(nullptr));
|
||||
}
|
||||
|
||||
GEN12LPTEST_F(L0GfxCoreHelperTestGen12Lp, GivenGen12LpWhenCheckingL0HelperForCmdListHeapSharingSupportThenReturnTrue) {
|
||||
auto &l0GfxCoreHelper = getHelper<L0GfxCoreHelper>();
|
||||
|
||||
|
||||
@@ -1335,31 +1335,15 @@ void DebugSession::updateGrfRegisterSetProperties(EuThread::ThreadId thread, uin
|
||||
return;
|
||||
}
|
||||
|
||||
auto &gfxCoreHelper = this->connectedDevice->getGfxCoreHelper();
|
||||
if (!gfxCoreHelper.largeGrfModeSupported()) {
|
||||
return;
|
||||
}
|
||||
|
||||
// update GRF, if large GRF is enabled
|
||||
auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
|
||||
auto regsetType = l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection();
|
||||
bool largeGrfModeEnabled = false;
|
||||
const auto regSize = std::max(getRegisterSize(regsetType), 64u);
|
||||
auto reg = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
|
||||
memset(reg.get(), 0, regSize);
|
||||
readRegistersImp(thread, regsetType, 0, 1, reg.get());
|
||||
auto regPtr = reg.get();
|
||||
if (regsetType == ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU) {
|
||||
largeGrfModeEnabled = regPtr[0] & 0x2000;
|
||||
} else if (regsetType == ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU) {
|
||||
largeGrfModeEnabled = ((regPtr[1] & 0x6000) == 0x6000);
|
||||
}
|
||||
|
||||
if (largeGrfModeEnabled) {
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
if (pRegisterSetProperties[i].type == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
|
||||
pRegisterSetProperties[i].count = 256;
|
||||
}
|
||||
for (uint32_t i = 0; i < *pCount; i++) {
|
||||
if (pRegisterSetProperties[i].type == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
|
||||
pRegisterSetProperties[i].count = l0GfxCoreHelper.getGrfRegisterCount(reg.get());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2703,37 +2703,6 @@ HWTEST2_F(DebugSessionRegistersAccessTest,
|
||||
EXPECT_EQ(128u, threadRegsetProps[0].count);
|
||||
}
|
||||
|
||||
TEST_F(DebugSessionRegistersAccessTest, givenGetThreadRegisterSetPropertiesCalledPropertieAreTheSameAsgetRegisterSetProperties) {
|
||||
|
||||
auto mockBuiltins = new MockBuiltins();
|
||||
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2);
|
||||
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
|
||||
|
||||
uint32_t count = 0;
|
||||
uint32_t threadCount = 0;
|
||||
ze_device_thread_t thread = stoppedThread;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, nullptr));
|
||||
EXPECT_EQ(13u, count);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
|
||||
ASSERT_EQ(threadCount, count);
|
||||
|
||||
std::vector<zet_debug_regset_properties_t> regsetProps(count);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, regsetProps.data()));
|
||||
std::vector<zet_debug_regset_properties_t> threadRegsetProps(count);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &count, threadRegsetProps.data()));
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
EXPECT_EQ(regsetProps[i].stype, threadRegsetProps[i].stype);
|
||||
EXPECT_EQ(regsetProps[i].pNext, threadRegsetProps[i].pNext);
|
||||
EXPECT_EQ(regsetProps[i].version, threadRegsetProps[i].version);
|
||||
EXPECT_EQ(regsetProps[i].generalFlags, threadRegsetProps[i].generalFlags);
|
||||
EXPECT_EQ(regsetProps[i].deviceFlags, threadRegsetProps[i].deviceFlags);
|
||||
EXPECT_EQ(regsetProps[i].count, threadRegsetProps[i].count);
|
||||
EXPECT_EQ(regsetProps[i].bitSize, threadRegsetProps[i].bitSize);
|
||||
EXPECT_EQ(regsetProps[i].byteSize, threadRegsetProps[i].byteSize);
|
||||
}
|
||||
}
|
||||
|
||||
TEST_F(DebugSessionRegistersAccessTest, givenUnsupportedRegisterTypeWhenReadRegistersCalledThenErrorInvalidArgumentIsReturned) {
|
||||
session->areRequestedThreadsStoppedReturnValue = 1;
|
||||
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, zetDebugReadRegisters(session->toHandle(), stoppedThread, 0x12345, 0, 1, nullptr));
|
||||
|
||||
@@ -374,7 +374,7 @@ TEST_F(DebugApiTest, givenGetRegisterSetPropertiesCalledWithV3HeaderCorrectPrope
|
||||
validateRegsetProps(regsetProps[1], ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 256, 32);
|
||||
validateRegsetProps(regsetProps[2], ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 32, 4);
|
||||
validateRegsetProps(regsetProps[3], ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 32, 4);
|
||||
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 128, 16);
|
||||
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 160, 20);
|
||||
validateRegsetProps(regsetProps[5], ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 128, 16);
|
||||
validateRegsetProps(regsetProps[6], ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 128, 16);
|
||||
validateRegsetProps(regsetProps[7], ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 10, 256, 32);
|
||||
@@ -418,7 +418,7 @@ TEST_F(DebugApiTest, givenGetRegisterSetPropertiesCalledWhenV3HeaderHeaplessThen
|
||||
validateRegsetProps(regsetProps[1], ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 256, 32);
|
||||
validateRegsetProps(regsetProps[2], ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 32, 4);
|
||||
validateRegsetProps(regsetProps[3], ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 32, 4);
|
||||
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 128, 16);
|
||||
validateRegsetProps(regsetProps[4], ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 2, 160, 20);
|
||||
validateRegsetProps(regsetProps[5], ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 1, 128, 16);
|
||||
validateRegsetProps(regsetProps[6], ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE, 1, 128, 16);
|
||||
validateRegsetProps(regsetProps[7], ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU, ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE, 10, 256, 32);
|
||||
|
||||
@@ -90,5 +90,38 @@ HWTEST2_F(DebugApiTest, givenDeviceWhenDebugAttachIsAvaialbleThenGetPropertiesRe
|
||||
EXPECT_EQ(ZET_DEVICE_DEBUG_PROPERTY_FLAG_ATTACH, debugProperties.flags);
|
||||
}
|
||||
|
||||
using DebugSessionRegistersAccessTestProductSpecfic = Test<DebugSessionRegistersAccess>;
|
||||
HWTEST2_F(DebugSessionRegistersAccessTestProductSpecfic, GivenGetThreadRegisterSetPropertiesCalledPropertieAreTheSameAsGetRegisterSetProperties,
|
||||
IsAtMostXe2HpgCore) {
|
||||
|
||||
auto mockBuiltins = new MockBuiltins();
|
||||
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2);
|
||||
MockRootDeviceEnvironment::resetBuiltins(neoDevice->executionEnvironment->rootDeviceEnvironments[0].get(), mockBuiltins);
|
||||
|
||||
uint32_t count = 0;
|
||||
uint32_t threadCount = 0;
|
||||
ze_device_thread_t thread = stoppedThread;
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, nullptr));
|
||||
EXPECT_EQ(13u, count);
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
|
||||
ASSERT_EQ(threadCount, count);
|
||||
|
||||
std::vector<zet_debug_regset_properties_t> regsetProps(count);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(session->getConnectedDevice(), &count, regsetProps.data()));
|
||||
std::vector<zet_debug_regset_properties_t> threadRegsetProps(count);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &count, threadRegsetProps.data()));
|
||||
|
||||
for (size_t i = 0; i < count; i++) {
|
||||
EXPECT_EQ(regsetProps[i].stype, threadRegsetProps[i].stype);
|
||||
EXPECT_EQ(regsetProps[i].pNext, threadRegsetProps[i].pNext);
|
||||
EXPECT_EQ(regsetProps[i].version, threadRegsetProps[i].version);
|
||||
EXPECT_EQ(regsetProps[i].generalFlags, threadRegsetProps[i].generalFlags);
|
||||
EXPECT_EQ(regsetProps[i].deviceFlags, threadRegsetProps[i].deviceFlags);
|
||||
EXPECT_EQ(regsetProps[i].count, threadRegsetProps[i].count);
|
||||
EXPECT_EQ(regsetProps[i].bitSize, threadRegsetProps[i].bitSize);
|
||||
EXPECT_EQ(regsetProps[i].byteSize, threadRegsetProps[i].byteSize);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -103,7 +103,6 @@ class GfxCoreHelper {
|
||||
virtual uint8_t getBarriersCountFromHasBarriers(uint8_t hasBarriers) const = 0;
|
||||
virtual uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const = 0;
|
||||
virtual uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const = 0;
|
||||
virtual bool largeGrfModeSupported() const = 0;
|
||||
virtual uint32_t alignSlmSize(uint32_t slmSize) const = 0;
|
||||
virtual uint32_t computeSlmValues(const HardwareInfo &hwInfo, uint32_t slmSize) const = 0;
|
||||
|
||||
@@ -308,7 +307,6 @@ class GfxCoreHelperHw : public GfxCoreHelper {
|
||||
uint32_t calculateAvailableThreadCount(const HardwareInfo &hwInfo, uint32_t grfCount) const override;
|
||||
|
||||
uint32_t calculateMaxWorkGroupSize(const KernelDescriptor &kernelDescriptor, uint32_t defaultMaxGroupSize) const override;
|
||||
bool largeGrfModeSupported() const override;
|
||||
|
||||
uint32_t alignSlmSize(uint32_t slmSize) const override;
|
||||
|
||||
|
||||
@@ -647,11 +647,6 @@ bool GfxCoreHelperHw<GfxFamily>::isChipsetUniqueUUIDSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool GfxCoreHelperHw<GfxFamily>::largeGrfModeSupported() const {
|
||||
return false;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
bool GfxCoreHelperHw<GfxFamily>::isTimestampShiftRequired() const {
|
||||
return true;
|
||||
|
||||
@@ -210,11 +210,6 @@ bool GfxCoreHelperHw<Family>::isChipsetUniqueUUIDSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <>
|
||||
bool GfxCoreHelperHw<Family>::largeGrfModeSupported() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
template <typename GfxFamily>
|
||||
uint32_t GfxCoreHelperHw<GfxFamily>::getKernelPrivateMemSize(const KernelDescriptor &kernelDescriptor) const {
|
||||
const auto &kernelAttributes = kernelDescriptor.kernelAttributes;
|
||||
|
||||
@@ -162,7 +162,7 @@ std::vector<char> createStateSaveAreaHeader(uint32_t version, uint16_t grfNum, u
|
||||
{4096, 1, 256, 32}, // addr
|
||||
{4128, 2, 32, 4}, // flag
|
||||
{4156, 1, 32, 4}, // emask
|
||||
{4160, 2, 128, 16}, // sr
|
||||
{4160, 2, 160, 20}, // sr
|
||||
{4192, 1, 128, 16}, // cr
|
||||
{4256, 1, 96, 12}, // notification
|
||||
{4288, 1, 128, 16}, // tdr
|
||||
|
||||
@@ -19,11 +19,6 @@
|
||||
|
||||
using GfxCoreHelperTestDg1 = GfxCoreHelperTest;
|
||||
|
||||
DG1TEST_F(GfxCoreHelperTestDg1, givenDg1SteppingWhenLargeGrfModeSupportedIsQueriedThenFalseIsReturned) {
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_FALSE(gfxCoreHelper.largeGrfModeSupported());
|
||||
}
|
||||
|
||||
DG1TEST_F(GfxCoreHelperTestDg1, givenDg1SteppingA0WhenAdjustDefaultEngineTypeCalledThenRcsIsReturned) {
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
const auto &productHelper = getHelper<ProductHelper>();
|
||||
|
||||
@@ -732,11 +732,6 @@ XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenParamsWhenCalculateNumThre
|
||||
}
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenXe2HpgWhenLargeGrfModeSupportedIsQueriedThenTrueIsReturned) {
|
||||
auto &gfxCoreHelper = getHelper<GfxCoreHelper>();
|
||||
EXPECT_TRUE(gfxCoreHelper.largeGrfModeSupported());
|
||||
}
|
||||
|
||||
XE2_HPG_CORETEST_F(GfxCoreHelperTestsXe2HpgCore, givenGfxCoreHelperWhenFlagSetAndCallGetAmountOfAllocationsToFillThenReturnCorrectValue) {
|
||||
DebugManagerStateRestore restorer;
|
||||
MockExecutionEnvironment mockExecutionEnvironment{};
|
||||
|
||||
Reference in New Issue
Block a user