feature: Report correct GRF register count

Based on Large GRF enabled or not, report correct GRF
register.

Related-To: NEO-6788
Signed-off-by: Jitendra Sharma <jitendra.sharma@intel.com>
This commit is contained in:
Jitendra Sharma
2023-08-22 17:53:27 +00:00
committed by Compute-Runtime-Automation
parent 0cf60e1ac3
commit 8eb3fe222e
22 changed files with 242 additions and 67 deletions

View File

@@ -110,6 +110,9 @@ struct DebugSession : _zet_debug_session_handle_t {
protected:
DebugSession(const zet_debug_config_t &config, Device *device);
void createEuThreads();
void updateGrfRegisterSetProperties(EuThread::ThreadId thread, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties);
virtual uint32_t getRegisterSize(uint32_t type) = 0;
virtual ze_result_t readRegistersImp(EuThread::ThreadId thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) = 0;
virtual void startAsyncThread() = 0;

View File

@@ -1160,6 +1160,40 @@ ze_result_t DebugSessionImp::readSbaRegisters(EuThread::ThreadId threadId, uint3
return ZE_RESULT_SUCCESS;
}
void DebugSession::updateGrfRegisterSetProperties(EuThread::ThreadId thread, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) {
if (pRegisterSetProperties == nullptr) {
return;
}
auto &gfxCoreHelper = this->connectedDevice->getGfxCoreHelper();
if (!gfxCoreHelper.largeGrfModeSupported()) {
return;
}
// update GRF, if large GRF is enabled
auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
auto regsetType = l0GfxCoreHelper.getRegsetTypeForLargeGrfDetection();
bool largeGrfModeEnabled = false;
const auto regSize = std::max(getRegisterSize(regsetType), 64u);
auto reg = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
memset(reg.get(), 0, regSize);
readRegistersImp(thread, regsetType, 0, 1, reg.get());
auto regPtr = reg.get();
if (regsetType == ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU) {
largeGrfModeEnabled = regPtr[0] & 0x2000;
} else if (regsetType == ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU) {
largeGrfModeEnabled = regPtr[1] & 0x6000;
}
if (!largeGrfModeEnabled) {
for (uint32_t i = 0; i < *pCount; i++) {
if (pRegisterSetProperties[i].type == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) {
pRegisterSetProperties[i].count = 128;
}
}
}
}
ze_result_t DebugSession::getThreadRegisterSetProperties(ze_device_thread_t thread, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) {
if (!isSingleThread(thread)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
@@ -1170,7 +1204,13 @@ ze_result_t DebugSession::getThreadRegisterSetProperties(ze_device_thread_t thre
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
return getRegisterSetProperties(this->connectedDevice, pCount, pRegisterSetProperties);
auto ret = getRegisterSetProperties(this->connectedDevice, pCount, pRegisterSetProperties);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
updateGrfRegisterSetProperties(threadId, pCount, pRegisterSetProperties);
return ret;
}
ze_result_t DebugSession::getRegisterSetProperties(Device *device, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) {
@@ -1193,7 +1233,6 @@ ze_result_t DebugSession::getRegisterSetProperties(Device *device, uint32_t *pCo
auto parseRegsetDesc = [&](const SIP::regset_desc &regsetDesc, zet_debug_regset_type_intel_gpu_t regsetType) {
if (regsetDesc.num) {
if (totalRegsetNum < *pCount) {
uint16_t num = (regsetType == ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU) ? 128 : regsetDesc.num;
zet_debug_regset_properties_t regsetProps = {
ZET_STRUCTURE_TYPE_DEBUG_REGSET_PROPERTIES,
nullptr,
@@ -1201,7 +1240,7 @@ ze_result_t DebugSession::getRegisterSetProperties(Device *device, uint32_t *pCo
0,
DebugSessionImp::typeToRegsetFlags(regsetType),
0,
num,
regsetDesc.num,
regsetDesc.bits,
regsetDesc.bytes,
};

View File

@@ -71,7 +71,7 @@ struct DebugSessionImp : DebugSession {
using ApiEventQueue = std::queue<zet_debug_event_t>;
protected:
MOCKABLE_VIRTUAL ze_result_t readRegistersImp(EuThread::ThreadId thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues);
ze_result_t readRegistersImp(EuThread::ThreadId thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) override;
MOCKABLE_VIRTUAL ze_result_t writeRegistersImp(EuThread::ThreadId thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues);
Error resumeThreadsWithinDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread);
MOCKABLE_VIRTUAL bool writeResumeCommand(const std::vector<EuThread::ThreadId> &threadIds);
@@ -120,7 +120,7 @@ struct DebugSessionImp : DebugSession {
MOCKABLE_VIRTUAL ze_result_t waitForCmdReady(EuThread::ThreadId threadId, uint16_t retryCount);
const SIP::regset_desc *typeToRegsetDesc(uint32_t type);
uint32_t getRegisterSize(uint32_t type);
uint32_t getRegisterSize(uint32_t type) override;
size_t calculateThreadSlotOffset(EuThread::ThreadId threadId);
size_t calculateRegisterOffsetInThreadSlot(const SIP::regset_desc *const regdesc, uint32_t start);

View File

@@ -1,5 +1,5 @@
#
# Copyright (C) 2021-2022 Intel Corporation
# Copyright (C) 2021-2023 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@@ -13,6 +13,7 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_debug_api.cpp
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_common.h
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_common.cpp
${CMAKE_CURRENT_SOURCE_DIR}/debug_session_registers_access.h
)
add_subdirectories()

View File

@@ -0,0 +1,61 @@
/*
* Copyright (C) 2022-2023 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
#include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h"
#include "common/StateSaveAreaHeader.h"
namespace L0 {
namespace ult {
struct DebugSessionRegistersAccess {
void setUp() {
zet_debug_config_t config = {};
config.pid = 0x1234;
auto hwInfo = *NEO::defaultHwInfo.get();
neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0);
deviceImp = std::make_unique<Mock<L0::DeviceImp>>(neoDevice, neoDevice->getExecutionEnvironment());
session = std::make_unique<MockDebugSession>(config, deviceImp.get());
session->allThreads[stoppedThreadId]->stopThread(1u);
session->allThreads[stoppedThreadId]->reportAsStopped();
}
void tearDown() {
}
void dumpRegisterState() {
if (session->stateSaveAreaHeader.size() == 0) {
return;
}
auto pStateSaveAreaHeader = reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data());
for (uint32_t thread = 0; thread < pStateSaveAreaHeader->regHeader.num_threads_per_eu; thread++) {
EuThread::ThreadId threadId(0, 0, 0, 0, thread);
auto threadSlotOffset = session->calculateThreadSlotOffset(threadId);
auto srMagicOffset = threadSlotOffset + pStateSaveAreaHeader->regHeader.sr_magic_offset;
SIP::sr_ident srMagic;
srMagic.count = 1;
srMagic.version.major = pStateSaveAreaHeader->versionHeader.version.major;
session->writeGpuMemory(0, reinterpret_cast<char *>(&srMagic), sizeof(srMagic), reinterpret_cast<uint64_t>(pStateSaveAreaHeader) + srMagicOffset);
}
}
ze_device_thread_t stoppedThread = {0, 0, 0, 0};
EuThread::ThreadId stoppedThreadId{0, stoppedThread};
std::unique_ptr<MockDebugSession> session;
std::unique_ptr<Mock<L0::DeviceImp>> deviceImp;
NEO::MockDevice *neoDevice = nullptr;
};
} // namespace ult
} // namespace L0

View File

@@ -20,7 +20,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/include/zet_intel_gpu_debug.h"
#include "level_zero/tools/source/debug/debug_session_imp.h"
#include "level_zero/tools/test/unit_tests/sources/debug/mock_debug_session.h"
#include "level_zero/tools/test/unit_tests/sources/debug/debug_session_registers_access.h"
#include "common/StateSaveAreaHeader.h"
#include "encode_surface_state_args.h"
@@ -2228,50 +2228,6 @@ TEST_F(MultiTileDebugSessionTest, GivenMultitileDeviceWhenCallingAreRequestedThr
EXPECT_TRUE(stopped);
}
struct DebugSessionRegistersAccess {
void setUp() {
zet_debug_config_t config = {};
config.pid = 0x1234;
auto hwInfo = *NEO::defaultHwInfo.get();
neoDevice = NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0);
deviceImp = std::make_unique<Mock<L0::DeviceImp>>(neoDevice, neoDevice->getExecutionEnvironment());
session = std::make_unique<MockDebugSession>(config, deviceImp.get());
session->allThreads[stoppedThreadId]->stopThread(1u);
session->allThreads[stoppedThreadId]->reportAsStopped();
}
void tearDown() {
}
void dumpRegisterState() {
if (session->stateSaveAreaHeader.size() == 0) {
return;
}
auto pStateSaveAreaHeader = reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data());
for (uint32_t thread = 0; thread < pStateSaveAreaHeader->regHeader.num_threads_per_eu; thread++) {
EuThread::ThreadId threadId(0, 0, 0, 0, thread);
auto threadSlotOffset = session->calculateThreadSlotOffset(threadId);
auto srMagicOffset = threadSlotOffset + pStateSaveAreaHeader->regHeader.sr_magic_offset;
SIP::sr_ident srMagic;
srMagic.count = 1;
srMagic.version.major = pStateSaveAreaHeader->versionHeader.version.major;
session->writeGpuMemory(0, reinterpret_cast<char *>(&srMagic), sizeof(srMagic), reinterpret_cast<uint64_t>(pStateSaveAreaHeader) + srMagicOffset);
}
}
ze_device_thread_t stoppedThread = {0, 0, 0, 0};
EuThread::ThreadId stoppedThreadId{0, stoppedThread};
std::unique_ptr<MockDebugSession> session;
std::unique_ptr<Mock<L0::DeviceImp>> deviceImp;
NEO::MockDevice *neoDevice = nullptr;
};
using DebugSessionRegistersAccessTest = Test<DebugSessionRegistersAccess>;
TEST_F(DebugSessionRegistersAccessTest, givenTypeToRegsetDescCalledThenCorrectRegdescIsReturned) {
@@ -2319,6 +2275,72 @@ TEST_F(DebugSessionRegistersAccessTest, givenGetThreadRegisterSetPropertiesCalle
EXPECT_EQ(ZE_RESULT_ERROR_NOT_AVAILABLE, zetDebugGetThreadRegisterSetProperties(session->toHandle(), stoppedThread, &threadCount, nullptr));
}
TEST_F(DebugSessionRegistersAccessTest,
givenNonZeroCountAndNullRegsetPointerWhenGetThreadRegisterSetPropertiesCalledTheniInvalidNullPointerIsReturned) {
uint32_t threadCount = 10;
ze_device_thread_t thread = stoppedThread;
EXPECT_EQ(ZE_RESULT_ERROR_INVALID_NULL_POINTER,
zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
}
HWTEST2_F(DebugSessionRegistersAccessTest,
givenGetThreadRegisterSetPropertiesCalledWhenLargeGrfIsSetThen256GrfRegisterCountIsReported,
IsXeHpOrXeHpcOrXeHpgCore) {
auto mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2, 256);
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltins);
{
auto pStateSaveAreaHeader = reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data());
auto size = pStateSaveAreaHeader->versionHeader.size * 8 +
pStateSaveAreaHeader->regHeader.state_area_offset +
pStateSaveAreaHeader->regHeader.state_save_size * 16;
session->stateSaveAreaHeader.resize(size);
}
ze_device_thread_t thread = stoppedThread;
auto *regdesc = &(reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data()))->regHeader.cr;
uint32_t cr0[8] = {0, 0, 0, 0, 0, 0, 0, 0};
cr0[0] = 0x80002000;
session->registersAccessHelper(session->allThreads[stoppedThreadId].get(), regdesc, 0, 1, cr0, true);
uint32_t threadCount = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(threadCount);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, threadRegsetProps.data()));
EXPECT_EQ(256u, threadRegsetProps[0].count);
}
HWTEST2_F(DebugSessionRegistersAccessTest,
givenGetThreadRegisterSetPropertiesCalledWhenLargeGrfIsNotSetThen128GrfRegisterCountIsReported,
IsXeHpOrXeHpcOrXeHpgCore) {
auto mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2, 256);
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltins);
{
auto pStateSaveAreaHeader = reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data());
auto size = pStateSaveAreaHeader->versionHeader.size * 8 +
pStateSaveAreaHeader->regHeader.state_area_offset +
pStateSaveAreaHeader->regHeader.state_save_size * 16;
session->stateSaveAreaHeader.resize(size);
}
ze_device_thread_t thread = stoppedThread;
auto *regdesc = &(reinterpret_cast<SIP::StateSaveAreaHeader *>(session->stateSaveAreaHeader.data()))->regHeader.cr;
uint32_t cr0[8] = {0, 0, 0, 0, 0, 0, 0, 0};
cr0[0] = 0x80000000;
session->registersAccessHelper(session->allThreads[stoppedThreadId].get(), regdesc, 0, 1, cr0, true);
uint32_t threadCount = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, nullptr));
std::vector<zet_debug_regset_properties_t> threadRegsetProps(threadCount);
ASSERT_EQ(ZE_RESULT_SUCCESS, zetDebugGetThreadRegisterSetProperties(session->toHandle(), thread, &threadCount, threadRegsetProps.data()));
EXPECT_EQ(128u, threadRegsetProps[0].count);
}
TEST_F(DebugSessionRegistersAccessTest, givenGetThreadRegisterSetPropertiesCalledPropertieAreTheSameAsgetRegisterSetProperties) {
auto mockBuiltins = new MockBuiltins();

View File

@@ -116,6 +116,14 @@ struct DebugSessionMock : public L0::DebugSession {
return true;
}
ze_result_t readRegistersImp(EuThread::ThreadId thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) override {
return ZE_RESULT_ERROR_UNSUPPORTED_FEATURE;
}
uint32_t getRegisterSize(uint32_t type) override {
return 0;
}
void detachTileDebugSession(DebugSession *tileSession) override {}
bool areAllTileDebugSessionDetached() override { return true; }

View File

@@ -334,21 +334,6 @@ TEST_F(DebugApiTest, givenSIPHeaderHasZeroSizeMMEThenNotExposedAsRegset) {
}
}
TEST_F(DebugApiTest, givenSIPHeaderGRFCountNotEqualTo128ThenGetRegisterSetPropertiesReturns128) {
mockBuiltins = new MockBuiltins();
mockBuiltins->stateSaveAreaHeader = MockSipData::createStateSaveAreaHeader(2, 256);
neoDevice->executionEnvironment->rootDeviceEnvironments[0]->builtins.reset(mockBuiltins);
uint32_t count = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(device->toHandle(), &count, nullptr));
EXPECT_EQ(13u, count);
std::vector<zet_debug_regset_properties_t> regsetProps(count);
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(device->toHandle(), &count, regsetProps.data()));
EXPECT_EQ(13u, count);
EXPECT_EQ(128u, regsetProps[0].count);
}
TEST_F(DebugApiTest, givenGetRegisterSetPropertiesCalledCorrectPropertiesReturned) {
uint32_t count = 0;
EXPECT_EQ(ZE_RESULT_SUCCESS, zetDebugGetRegisterSetProperties(device->toHandle(), &count, nullptr));