Files
compute-runtime/level_zero/tools/source/debug/debug_session_imp.cpp
Yates, Brandon 66581a0a1d L0 Debug - Avoid SSAH lookup when no threads are stopped
During async thread event processing, it was possible to
read SSAH before any threads stopped and before it was
resident resulting in an assert. This is both a fix for
assertion and minor optimization.

Signed-off-by: Yates, Brandon <brandon.yates@intel.com>
2022-12-13 03:07:22 +01:00

1305 lines
48 KiB
C++

/*
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/tools/source/debug/debug_session_imp.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/hw_helper.h"
#include "shared/source/helpers/sleep.h"
#include "shared/source/helpers/string.h"
#include "shared/source/os_interface/os_interface.h"
#include "level_zero/core/source/device/device_imp.h"
#include "level_zero/core/source/hw_helpers/l0_hw_helper.h"
#include "level_zero/include/zet_intel_gpu_debug.h"
namespace L0 {
DebugSession::DebugSession(const zet_debug_config_t &config, Device *device) : connectedDevice(device), config(config) {
}
const NEO::TopologyMap &DebugSession::getTopologyMap() {
return connectedDevice->getOsInterface().getDriverModel()->getTopologyMap();
};
void DebugSession::createEuThreads() {
if (connectedDevice) {
bool isSubDevice = connectedDevice->getNEODevice()->isSubDevice();
auto &hwInfo = connectedDevice->getHwInfo();
const uint32_t numSubslicesPerSlice = std::max(hwInfo.gtSystemInfo.MaxSubSlicesSupported, hwInfo.gtSystemInfo.MaxDualSubSlicesSupported) / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
uint32_t subDeviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
UNRECOVERABLE_IF(isSubDevice && subDeviceCount > 1);
for (uint32_t tileIndex = 0; tileIndex < subDeviceCount; tileIndex++) {
if (isSubDevice || subDeviceCount == 1) {
tileIndex = Math::log2(static_cast<uint32_t>(connectedDevice->getNEODevice()->getDeviceBitfield().to_ulong()));
}
for (uint32_t sliceID = 0; sliceID < hwInfo.gtSystemInfo.MaxSlicesSupported; sliceID++) {
for (uint32_t subsliceID = 0; subsliceID < numSubslicesPerSlice; subsliceID++) {
for (uint32_t euID = 0; euID < numEuPerSubslice; euID++) {
for (uint32_t threadID = 0; threadID < numThreadsPerEu; threadID++) {
EuThread::ThreadId thread = {tileIndex, sliceID, subsliceID, euID, threadID};
allThreads[uint64_t(thread)] = std::make_unique<EuThread>(thread);
}
}
}
}
if (isSubDevice || subDeviceCount == 1) {
break;
}
}
}
}
uint32_t DebugSession::getDeviceIndexFromApiThread(ze_device_thread_t thread) {
auto deviceBitfield = connectedDevice->getNEODevice()->getDeviceBitfield();
uint32_t deviceIndex = Math::log2(static_cast<uint32_t>(deviceBitfield.to_ulong()));
auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
const auto &topologyMap = getTopologyMap();
if (connectedDevice->getNEODevice()->isSubDevice()) {
return deviceIndex;
}
if (deviceCount > 1) {
if (thread.slice == UINT32_MAX) {
deviceIndex = UINT32_MAX;
} else {
uint32_t sliceId = thread.slice;
for (uint32_t i = 0; i < topologyMap.size(); i++) {
if (deviceBitfield.test(i)) {
if (sliceId < topologyMap.at(i).sliceIndices.size()) {
deviceIndex = i;
}
sliceId = sliceId - static_cast<uint32_t>(topologyMap.at(i).sliceIndices.size());
}
}
}
}
return deviceIndex;
}
ze_device_thread_t DebugSession::convertToPhysicalWithinDevice(ze_device_thread_t thread, uint32_t deviceIndex) {
auto deviceImp = static_cast<DeviceImp *>(connectedDevice);
const auto &topologyMap = getTopologyMap();
// set slice for single slice config to allow subslice remapping
auto mapping = topologyMap.find(deviceIndex);
if (thread.slice == UINT32_MAX && mapping != topologyMap.end() && mapping->second.sliceIndices.size() == 1) {
thread.slice = 0;
}
if (thread.slice != UINT32_MAX) {
if (thread.subslice != UINT32_MAX) {
deviceImp->toPhysicalSliceId(topologyMap, thread.slice, thread.subslice, deviceIndex);
} else {
uint32_t dummy = 0;
deviceImp->toPhysicalSliceId(topologyMap, thread.slice, dummy, deviceIndex);
}
}
return thread;
}
EuThread::ThreadId DebugSession::convertToThreadId(ze_device_thread_t thread) {
auto deviceImp = static_cast<DeviceImp *>(connectedDevice);
UNRECOVERABLE_IF(!DebugSession::isSingleThread(thread));
uint32_t deviceIndex = 0;
deviceImp->toPhysicalSliceId(getTopologyMap(), thread.slice, thread.subslice, deviceIndex);
EuThread::ThreadId threadId(deviceIndex, thread.slice, thread.subslice, thread.eu, thread.thread);
return threadId;
}
ze_device_thread_t DebugSession::convertToApi(EuThread::ThreadId threadId) {
auto deviceImp = static_cast<DeviceImp *>(connectedDevice);
ze_device_thread_t thread = {static_cast<uint32_t>(threadId.slice), static_cast<uint32_t>(threadId.subslice), static_cast<uint32_t>(threadId.eu), static_cast<uint32_t>(threadId.thread)};
deviceImp->toApiSliceId(getTopologyMap(), thread.slice, thread.subslice, threadId.tileIndex);
return thread;
}
std::vector<EuThread::ThreadId> DebugSession::getSingleThreadsForDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread, const NEO::HardwareInfo &hwInfo) {
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
UNRECOVERABLE_IF(numThreadsPerEu > 8);
std::vector<EuThread::ThreadId> threads;
const uint32_t slice = physicalThread.slice;
const uint32_t subslice = physicalThread.subslice;
const uint32_t eu = physicalThread.eu;
const uint32_t thread = physicalThread.thread;
for (uint32_t sliceID = 0; sliceID < hwInfo.gtSystemInfo.MaxSlicesSupported; sliceID++) {
if (slice != UINT32_MAX) {
sliceID = slice;
}
for (uint32_t subsliceID = 0; subsliceID < numSubslicesPerSlice; subsliceID++) {
if (subslice != UINT32_MAX) {
subsliceID = subslice;
}
for (uint32_t euID = 0; euID < numEuPerSubslice; euID++) {
if (eu != UINT32_MAX) {
euID = eu;
}
for (uint32_t threadID = 0; threadID < numThreadsPerEu; threadID++) {
if (thread != UINT32_MAX) {
threadID = thread;
}
threads.push_back({deviceIndex, sliceID, subsliceID, euID, threadID});
if (thread != UINT32_MAX) {
break;
}
}
if (eu != UINT32_MAX) {
break;
}
}
if (subslice != UINT32_MAX) {
break;
}
}
if (slice != UINT32_MAX) {
break;
}
}
return threads;
}
bool DebugSession::areRequestedThreadsStopped(ze_device_thread_t thread) {
auto &hwInfo = connectedDevice->getHwInfo();
bool requestedThreadsStopped = true;
auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
uint32_t deviceIndex = getDeviceIndexFromApiThread(thread);
auto areAllThreadsStopped = [this, &hwInfo](uint32_t deviceIndex, const ze_device_thread_t &thread) -> bool {
auto physicalThread = convertToPhysicalWithinDevice(thread, deviceIndex);
auto singleThreads = getSingleThreadsForDevice(deviceIndex, physicalThread, hwInfo);
for (auto &threadId : singleThreads) {
if (allThreads[threadId]->isStopped()) {
continue;
}
return false;
}
return true;
};
if (deviceIndex != UINT32_MAX) {
return areAllThreadsStopped(deviceIndex, thread);
}
for (uint32_t i = 0; i < deviceCount; i++) {
if (areAllThreadsStopped(i, thread) == false) {
return false;
}
}
return requestedThreadsStopped;
}
ze_result_t DebugSession::sanityMemAccessThreadCheck(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc) {
if (DebugSession::isThreadAll(thread)) {
if (desc->type != ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
} else {
return ZE_RESULT_SUCCESS;
}
} else if (DebugSession::isSingleThread(thread)) {
if (!areRequestedThreadsStopped(thread)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
} else {
return ZE_RESULT_SUCCESS;
}
}
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
void DebugSession::fillDevicesFromThread(ze_device_thread_t thread, std::vector<uint8_t> &devices) {
auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
UNRECOVERABLE_IF(devices.size() < deviceCount);
uint32_t deviceIndex = getDeviceIndexFromApiThread(thread);
bool singleDevice = (thread.slice != UINT32_MAX && deviceCount > 1) || deviceCount == 1;
if (singleDevice) {
devices[deviceIndex] = 1;
} else {
for (uint32_t i = 0; i < deviceCount; i++) {
devices[i] = 1;
}
}
}
bool DebugSession::isBindlessSystemRoutine() {
if (debugArea.reserved1 &= 1) {
return true;
}
return false;
}
size_t DebugSession::getPerThreadScratchOffset(size_t ptss, EuThread::ThreadId threadId) {
auto &hwInfo = connectedDevice->getHwInfo();
const uint32_t numSubslicesPerSlice = hwInfo.gtSystemInfo.MaxSubSlicesSupported / hwInfo.gtSystemInfo.MaxSlicesSupported;
const uint32_t numEuPerSubslice = hwInfo.gtSystemInfo.MaxEuPerSubSlice;
const uint32_t numThreadsPerEu = (hwInfo.gtSystemInfo.ThreadCount / hwInfo.gtSystemInfo.EUCount);
const auto &hwInfoConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
uint32_t threadEuRatio = hwInfoConfig.getThreadEuRatioForScratch(hwInfo);
uint32_t multiplyFactor = 1;
if (threadEuRatio / numThreadsPerEu > 1) {
multiplyFactor = threadEuRatio / numThreadsPerEu;
}
auto threadOffset = (((threadId.slice * numSubslicesPerSlice + threadId.subslice) * numEuPerSubslice + threadId.eu) * numThreadsPerEu * multiplyFactor + threadId.thread) * ptss;
return threadOffset;
}
void DebugSession::printBitmask(uint8_t *bitmask, size_t bitmaskSize) {
if (NEO::DebugManager.flags.DebuggerLogBitmask.get() & NEO::DebugVariables::DEBUGGER_LOG_BITMASK::LOG_INFO) {
DEBUG_BREAK_IF(bitmaskSize % sizeof(uint64_t) != 0);
PRINT_DEBUGGER_LOG(stdout, "\nINFO: Bitmask: ", "");
for (size_t i = 0; i < bitmaskSize / sizeof(uint64_t); i++) {
uint64_t bitmask64 = 0;
memcpy_s(&bitmask64, sizeof(uint64_t), &bitmask[i * sizeof(uint64_t)], sizeof(uint64_t));
PRINT_DEBUGGER_LOG(stdout, "\n [%lu] = %#018" PRIx64, static_cast<uint64_t>(i), bitmask64);
}
}
}
DebugSession *DebugSessionImp::attachTileDebugSession(Device *device) {
std::unique_lock<std::mutex> lock(asyncThreadMutex);
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(device->getNEODevice()->getDeviceBitfield().to_ulong()));
auto &[tileSession, attached] = tileSessions[subDeviceIndex];
if (attached) {
return nullptr;
}
tileSessions[subDeviceIndex].first->attachTile();
attached = true;
PRINT_DEBUGGER_INFO_LOG("TileDebugSession attached, deviceIndex = %lu\n", subDeviceIndex);
return tileSession;
}
void DebugSessionImp::detachTileDebugSession(DebugSession *tileSession) {
std::unique_lock<std::mutex> lock(asyncThreadMutex);
uint32_t subDeviceIndex = Math::log2(static_cast<uint32_t>(tileSession->getConnectedDevice()->getNEODevice()->getDeviceBitfield().to_ulong()));
tileSessions[subDeviceIndex].second = false;
tileSessions[subDeviceIndex].first->detachTile();
cleanRootSessionAfterDetach(subDeviceIndex);
PRINT_DEBUGGER_INFO_LOG("TileDebugSession detached, deviceIndex = %lu\n", subDeviceIndex);
}
bool DebugSessionImp::areAllTileDebugSessionDetached() {
for (const auto &session : tileSessions) {
if (session.second == true) {
return false;
}
}
return true;
}
ze_result_t DebugSessionImp::interrupt(ze_device_thread_t thread) {
if (areRequestedThreadsStopped(thread)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
{
std::unique_lock<std::mutex> lock(interruptMutex);
for (auto &previousThread : pendingInterrupts) {
if (areThreadsEqual(thread, previousThread.first)) {
return ZE_RESULT_NOT_READY;
}
}
interruptRequests.push(thread);
}
return ZE_RESULT_SUCCESS;
}
DebugSessionImp::Error DebugSessionImp::resumeThreadsWithinDevice(uint32_t deviceIndex, ze_device_thread_t physicalThread) {
auto &hwInfo = connectedDevice->getHwInfo();
bool allThreadsRunning = true;
auto singleThreads = getSingleThreadsForDevice(deviceIndex, physicalThread, hwInfo);
Error retVal = Error::Unknown;
std::vector<ze_device_thread_t> resumeThreads;
std::vector<EuThread::ThreadId> resumeThreadIds;
for (auto &threadId : singleThreads) {
if (allThreads[threadId]->isRunning()) {
continue;
}
allThreadsRunning = false;
resumeThreads.emplace_back(ze_device_thread_t{static_cast<uint32_t>(threadId.slice), static_cast<uint32_t>(threadId.subslice), static_cast<uint32_t>(threadId.eu), static_cast<uint32_t>(threadId.thread)});
resumeThreadIds.push_back(threadId);
}
if (allThreadsRunning) {
return Error::ThreadsRunning;
}
std::unique_lock<std::mutex> lock(threadStateMutex);
[[maybe_unused]] auto sipCommandResult = writeResumeCommand(resumeThreadIds);
DEBUG_BREAK_IF(sipCommandResult != true);
auto result = resumeImp(resumeThreadIds, deviceIndex);
for (auto &threadID : resumeThreadIds) {
while (checkThreadIsResumed(threadID) == false)
;
allThreads[threadID]->resumeThread();
}
if (sipCommandResult && result == ZE_RESULT_SUCCESS) {
retVal = Error::Success;
}
return retVal;
}
void DebugSessionImp::applyResumeWa(uint8_t *bitmask, size_t bitmaskSize) {
UNRECOVERABLE_IF(bitmaskSize % 8 != 0);
auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
if (l0GfxCoreHelper.isResumeWARequired()) {
uint32_t *dwordBitmask = reinterpret_cast<uint32_t *>(bitmask);
for (uint32_t i = 0; i < bitmaskSize / sizeof(uint32_t) - 1; i = i + 2) {
dwordBitmask[i] = dwordBitmask[i] | dwordBitmask[i + 1];
dwordBitmask[i + 1] = dwordBitmask[i] | dwordBitmask[i + 1];
}
}
return;
}
bool DebugSessionImp::writeResumeCommand(const std::vector<EuThread::ThreadId> &threadIds) {
auto stateSaveAreaHeader = getStateSaveAreaHeader();
bool success = true;
if (stateSaveAreaHeader->versionHeader.version.major < 2u) {
auto &hwInfo = connectedDevice->getHwInfo();
auto &l0GfxCoreHelper = connectedDevice->getNEODevice()->getRootDeviceEnvironment().getHelper<L0GfxCoreHelper>();
if (l0GfxCoreHelper.isResumeWARequired()) {
constexpr uint32_t sipResumeValue = 0x40000000;
bool isBindlessSip = (debugArea.reserved1 == 1);
auto registerType = ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU;
uint32_t dword = 1;
if (!isBindlessSip) {
registerType = ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU;
dword = 4;
}
const auto regSize = std::max(getRegisterSize(registerType), hwInfo.capabilityTable.grfSize);
auto reg = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
for (auto &threadID : threadIds) {
memset(reg.get(), 0, regSize);
if (readRegistersImp(threadID, registerType, 0, 1, reg.get()) != ZE_RESULT_SUCCESS) {
success = false;
} else {
reg[dword] |= sipResumeValue;
if (writeRegistersImp(threadID, registerType, 0, 1, reg.get()) != ZE_RESULT_SUCCESS) {
success = false;
}
}
}
}
} else // >= 2u
{
SIP::sip_command resumeCommand = {0};
resumeCommand.command = static_cast<uint32_t>(NEO::SipKernel::COMMAND::RESUME);
for (auto &threadID : threadIds) {
ze_result_t result = cmdRegisterAccessHelper(threadID, resumeCommand, true);
if (result != ZE_RESULT_SUCCESS) {
success = false;
}
}
}
return success;
}
bool DebugSessionImp::checkThreadIsResumed(const EuThread::ThreadId &threadID) {
auto stateSaveAreaHeader = getStateSaveAreaHeader();
bool resumed = true;
if (stateSaveAreaHeader->versionHeader.version.major >= 2u) {
SIP::sr_ident srMagic = {{0}};
const auto thread = allThreads[threadID].get();
if (!readSystemRoutineIdent(thread, thread->getMemoryHandle(), srMagic)) {
return resumed;
}
PRINT_DEBUGGER_THREAD_LOG("checkThreadIsResumed - Read counter for thread %s, counter == %d\n", EuThread::toString(threadID).c_str(), (int)srMagic.count);
// Counter greater than last one means thread was resumed
if (srMagic.count == thread->getLastCounter()) {
resumed = false;
}
}
return resumed;
}
ze_result_t DebugSessionImp::resume(ze_device_thread_t thread) {
auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
bool singleDevice = (thread.slice != UINT32_MAX && deviceCount > 1) || deviceCount == 1;
ze_result_t retVal = ZE_RESULT_SUCCESS;
if (singleDevice) {
uint32_t deviceIndex = Math::log2(static_cast<uint32_t>(connectedDevice->getNEODevice()->getDeviceBitfield().to_ulong()));
if (connectedDevice->getNEODevice()->isSubDevice()) {
deviceIndex = Math::log2(static_cast<uint32_t>(connectedDevice->getNEODevice()->getDeviceBitfield().to_ulong()));
} else {
if (thread.slice != UINT32_MAX) {
deviceIndex = getDeviceIndexFromApiThread(thread);
}
}
auto physicalThread = convertToPhysicalWithinDevice(thread, deviceIndex);
auto result = resumeThreadsWithinDevice(deviceIndex, physicalThread);
if (result == Error::ThreadsRunning) {
retVal = ZE_RESULT_ERROR_NOT_AVAILABLE;
} else if (result != Error::Success) {
return ZE_RESULT_ERROR_UNKNOWN;
}
} else {
bool allThreadsRunning = true;
for (uint32_t deviceId = 0; deviceId < deviceCount; deviceId++) {
auto physicalThread = convertToPhysicalWithinDevice(thread, deviceId);
auto result = resumeThreadsWithinDevice(deviceId, physicalThread);
if (result == Error::ThreadsRunning) {
continue;
} else if (result != Error::Success) {
retVal = ZE_RESULT_ERROR_UNKNOWN;
}
allThreadsRunning = false;
}
if (allThreadsRunning) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
}
return retVal;
}
void DebugSessionImp::sendInterrupts() {
if (interruptSent) {
return;
}
{
std::unique_lock<std::mutex> lock(interruptMutex);
while (interruptRequests.size() > 0) {
auto thread = interruptRequests.front();
pendingInterrupts.push_back(std::pair<ze_device_thread_t, bool>(thread, false));
interruptRequests.pop();
}
}
if (pendingInterrupts.size() == 0) {
return;
}
expectedAttentionEvents = 0;
auto deviceCount = std::max(1u, connectedDevice->getNEODevice()->getNumSubDevices());
if (deviceCount == 1) {
uint32_t deviceIndex = Math::log2(static_cast<uint32_t>(connectedDevice->getNEODevice()->getDeviceBitfield().to_ulong()));
ze_result_t result;
{
std::unique_lock<std::mutex> lock(threadStateMutex);
result = interruptImp(deviceIndex);
}
if (result == ZE_RESULT_SUCCESS) {
interruptTime = std::chrono::high_resolution_clock::now();
interruptSent = true;
} else {
zet_debug_event_t debugEvent = {};
debugEvent.type = ZET_DEBUG_EVENT_TYPE_THREAD_UNAVAILABLE;
for (auto &request : pendingInterrupts) {
debugEvent.info.thread.thread = request.first;
enqueueApiEvent(debugEvent);
}
{
std::unique_lock<std::mutex> lock(interruptMutex);
pendingInterrupts.clear();
}
}
} else {
std::vector<uint8_t> devices(deviceCount);
for (auto &request : pendingInterrupts) {
auto thread = request.first;
fillDevicesFromThread(thread, devices);
}
std::vector<ze_result_t> results(deviceCount);
for (uint32_t i = 0; i < deviceCount; i++) {
if (devices[i]) {
std::unique_lock<std::mutex> lock(threadStateMutex);
results[i] = interruptImp(i);
if (results[i] == ZE_RESULT_SUCCESS) {
expectedAttentionEvents++;
}
} else {
results[i] = ZE_RESULT_SUCCESS;
}
}
const bool allFailed = std::all_of(results.begin(), results.end(),
[](const auto &result) { return result != ZE_RESULT_SUCCESS; });
PRINT_DEBUGGER_INFO_LOG("Successful interrupt requests = %u \n", expectedAttentionEvents);
if (allFailed) {
zet_debug_event_t debugEvent = {};
debugEvent.type = ZET_DEBUG_EVENT_TYPE_THREAD_UNAVAILABLE;
for (auto &request : pendingInterrupts) {
debugEvent.info.thread.thread = request.first;
enqueueApiEvent(debugEvent);
}
expectedAttentionEvents = 0;
{
std::unique_lock<std::mutex> lock(interruptMutex);
pendingInterrupts.clear();
}
} else {
interruptTime = std::chrono::high_resolution_clock::now();
interruptSent = true;
}
}
}
bool DebugSessionImp::readSystemRoutineIdent(EuThread *thread, uint64_t memoryHandle, SIP::sr_ident &srIdent) {
auto stateSaveAreaHeader = getStateSaveAreaHeader();
if (!stateSaveAreaHeader) {
return false;
}
auto gpuVa = getContextStateSaveAreaGpuVa(memoryHandle);
if (gpuVa == 0) {
return false;
}
auto threadSlotOffset = calculateThreadSlotOffset(thread->getThreadId());
auto srMagicOffset = threadSlotOffset + stateSaveAreaHeader->regHeader.sr_magic_offset;
if (ZE_RESULT_SUCCESS != readGpuMemory(memoryHandle, reinterpret_cast<char *>(&srIdent), sizeof(srIdent), gpuVa + srMagicOffset)) {
return false;
}
if (0 != strcmp(srIdent.magic, "srmagic")) {
PRINT_DEBUGGER_ERROR_LOG("readSystemRoutineIdent - Failed to read srMagic for thread %s\n", EuThread::toString(thread->getThreadId()).c_str());
return false;
}
return true;
}
void DebugSessionImp::markPendingInterruptsOrAddToNewlyStoppedFromRaisedAttention(EuThread::ThreadId threadId, uint64_t memoryHandle) {
SIP::sr_ident srMagic = {{0}};
srMagic.count = 0;
bool wasStopped = false;
{
std::unique_lock<std::mutex> lock(threadStateMutex);
if (!readSystemRoutineIdent(allThreads[threadId].get(), memoryHandle, srMagic)) {
PRINT_DEBUGGER_ERROR_LOG("Failed to read SR IDENT\n", "");
return;
} else {
PRINT_DEBUGGER_INFO_LOG("SIP version == %d.%d.%d\n", (int)srMagic.version.major, (int)srMagic.version.minor, (int)srMagic.version.patch);
}
wasStopped = allThreads[threadId]->isStopped();
if (!allThreads[threadId]->verifyStopped(srMagic.count)) {
return;
}
allThreads[threadId]->stopThread(memoryHandle);
}
bool threadWasInterrupted = false;
for (auto &request : pendingInterrupts) {
ze_device_thread_t apiThread = convertToApi(threadId);
auto isInterrupted = checkSingleThreadWithinDeviceThread(apiThread, request.first);
if (isInterrupted) {
// mark pending interrupt as completed successfully only when new thread has been stopped
if (!wasStopped) {
request.second = true;
}
threadWasInterrupted = true;
}
}
if (!threadWasInterrupted && !wasStopped) {
newlyStoppedThreads.push_back(threadId);
}
}
void DebugSessionImp::generateEventsAndResumeStoppedThreads() {
if (interruptSent && !triggerEvents) {
auto timeDiff = getTimeDifferenceMilliseconds(interruptTime);
if (timeDiff > interruptTimeout) {
triggerEvents = true;
interruptTime = std::chrono::high_resolution_clock::now();
}
}
if (triggerEvents) {
std::vector<EuThread::ThreadId> resumeThreads;
std::vector<EuThread::ThreadId> stoppedThreadsToReport;
fillResumeAndStoppedThreadsFromNewlyStopped(resumeThreads, stoppedThreadsToReport);
resumeAccidentallyStoppedThreads(resumeThreads);
generateEventsForPendingInterrupts();
generateEventsForStoppedThreads(stoppedThreadsToReport);
interruptSent = false;
triggerEvents = false;
}
}
bool DebugSessionImp::isForceExceptionOrForceExternalHaltOnlyExceptionReason(uint32_t *cr0) {
const uint32_t cr0ExceptionBitmask = 0xFC000000;
const uint32_t cr0ForcedExcpetionBitmask = 0x44000000;
return (((cr0[1] & cr0ExceptionBitmask) & (~cr0ForcedExcpetionBitmask)) == 0);
}
void DebugSessionImp::fillResumeAndStoppedThreadsFromNewlyStopped(std::vector<EuThread::ThreadId> &resumeThreads, std::vector<EuThread::ThreadId> &stoppedThreadsToReport) {
if (newlyStoppedThreads.empty()) {
return;
}
const auto regSize = std::max(getRegisterSize(ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU), 64u);
auto reg = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
for (auto &newlyStopped : newlyStoppedThreads) {
if (allThreads[newlyStopped]->isStopped()) {
memset(reg.get(), 0, regSize);
readRegistersImp(newlyStopped, ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU, 0, 1, reg.get());
if (isForceExceptionOrForceExternalHaltOnlyExceptionReason(reg.get())) {
PRINT_DEBUGGER_THREAD_LOG("RESUME accidentally stopped thread = %s\n", allThreads[newlyStopped]->toString().c_str());
resumeThreads.push_back(newlyStopped);
} else {
PRINT_DEBUGGER_THREAD_LOG("Newly stopped thread = %s, exception bits = %#010" PRIx32 "\n", allThreads[newlyStopped]->toString().c_str(), reg[1]);
stoppedThreadsToReport.push_back(newlyStopped);
}
}
}
newlyStoppedThreads.clear();
}
void DebugSessionImp::generateEventsForPendingInterrupts() {
zet_debug_event_t debugEvent = {};
for (auto &request : pendingInterrupts) {
if (request.second == true) {
debugEvent.type = ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED;
debugEvent.info.thread.thread = request.first;
enqueueApiEvent(debugEvent);
} else {
debugEvent.type = ZET_DEBUG_EVENT_TYPE_THREAD_UNAVAILABLE;
debugEvent.info.thread.thread = request.first;
enqueueApiEvent(debugEvent);
}
}
{
std::unique_lock<std::mutex> lock(interruptMutex);
pendingInterrupts.clear();
}
}
void DebugSessionImp::resumeAccidentallyStoppedThreads(const std::vector<EuThread::ThreadId> &threadIds) {
std::vector<ze_device_thread_t> threads[4];
std::vector<EuThread::ThreadId> threadIdsPerDevice[4];
for (auto &threadID : threadIds) {
ze_device_thread_t thread = {static_cast<uint32_t>(threadID.slice), static_cast<uint32_t>(threadID.subslice), static_cast<uint32_t>(threadID.eu), static_cast<uint32_t>(threadID.thread)};
uint32_t deviceIndex = static_cast<uint32_t>(threadID.tileIndex);
UNRECOVERABLE_IF((connectedDevice->getNEODevice()->getNumSubDevices() > 0) &&
(deviceIndex >= connectedDevice->getNEODevice()->getNumSubDevices()));
threads[deviceIndex].push_back(thread);
threadIdsPerDevice[deviceIndex].push_back(threadID);
}
for (uint32_t i = 0; i < 4; i++) {
std::unique_lock<std::mutex> lock(threadStateMutex);
if (threadIdsPerDevice[i].size() > 0) {
[[maybe_unused]] auto writeSipCommandResult = writeResumeCommand(threadIdsPerDevice[i]);
DEBUG_BREAK_IF(writeSipCommandResult != true);
resumeImp(threadIdsPerDevice[i], i);
}
for (auto &threadID : threadIdsPerDevice[i]) {
while (checkThreadIsResumed(threadID) == false)
;
allThreads[threadID]->resumeThread();
}
}
}
void DebugSessionImp::generateEventsForStoppedThreads(const std::vector<EuThread::ThreadId> &threadIds) {
zet_debug_event_t debugEvent = {};
for (auto &threadID : threadIds) {
ze_device_thread_t thread = convertToApi(threadID);
debugEvent.type = ZET_DEBUG_EVENT_TYPE_THREAD_STOPPED;
debugEvent.info.thread.thread = thread;
enqueueApiEvent(debugEvent);
}
}
ze_result_t DebugSessionImp::readEvent(uint64_t timeout, zet_debug_event_t *outputEvent) {
if (outputEvent) {
outputEvent->type = ZET_DEBUG_EVENT_TYPE_INVALID;
outputEvent->flags = 0;
} else {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
do {
std::unique_lock<std::mutex> lock(asyncThreadMutex);
if (timeout > 0 && apiEvents.size() == 0) {
apiEventCondition.wait_for(lock, std::chrono::milliseconds(timeout));
}
if (apiEvents.size() > 0) {
*outputEvent = apiEvents.front();
apiEvents.pop();
return ZE_RESULT_SUCCESS;
}
} while (timeout == UINT64_MAX && asyncThread.threadActive);
return ZE_RESULT_NOT_READY;
}
void DebugSessionImp::validateAndSetStateSaveAreaHeader(uint64_t vmHandle, uint64_t gpuVa) {
auto headerSize = sizeof(SIP::StateSaveAreaHeader);
std::vector<char> data(headerSize);
auto retVal = readGpuMemory(vmHandle, data.data(), headerSize, gpuVa);
if (retVal != ZE_RESULT_SUCCESS) {
PRINT_DEBUGGER_ERROR_LOG("Reading Context State Save Area failed, error = %d\n", retVal);
return;
}
auto pStateSaveArea = reinterpret_cast<const SIP::StateSaveAreaHeader *>(data.data());
if (0 == strcmp(pStateSaveArea->versionHeader.magic, "tssarea")) {
size_t size = pStateSaveArea->versionHeader.size * 8u;
DEBUG_BREAK_IF(size != sizeof(SIP::StateSaveAreaHeader));
stateSaveAreaHeader.assign(data.begin(), data.begin() + size);
PRINT_DEBUGGER_INFO_LOG("Context State Save Area : version == %d.%d.%d\n", (int)pStateSaveArea->versionHeader.version.major, (int)pStateSaveArea->versionHeader.version.minor, (int)pStateSaveArea->versionHeader.version.patch);
slmSipVersionCheck();
} else {
PRINT_DEBUGGER_ERROR_LOG("Setting Context State Save Area: failed to match magic numbers\n", "");
}
}
void DebugSessionImp::slmSipVersionCheck() {
SIP::version sipVersion = getStateSaveAreaHeader()->versionHeader.version;
if ((sipVersion.major < minSlmSipVersion.major) ||
((sipVersion.major == minSlmSipVersion.major) && (sipVersion.minor < minSlmSipVersion.minor)) ||
((sipVersion.major == minSlmSipVersion.major) && (sipVersion.minor == minSlmSipVersion.minor) && (sipVersion.patch < minSlmSipVersion.patch))) {
sipSupportsSlm = false;
} else {
sipSupportsSlm = true;
}
}
const SIP::StateSaveAreaHeader *DebugSessionImp::getStateSaveAreaHeader() {
if (stateSaveAreaHeader.empty()) {
readStateSaveAreaHeader();
}
return reinterpret_cast<SIP::StateSaveAreaHeader *>(stateSaveAreaHeader.data());
}
const SIP::regset_desc *DebugSessionImp::getSbaRegsetDesc() {
// SBA virtual register set is always present
static const SIP::regset_desc sba = {0, ZET_DEBUG_SBA_COUNT_INTEL_GPU, 64, 8};
return &sba;
}
const SIP::regset_desc *DebugSessionImp::typeToRegsetDesc(uint32_t type) {
auto pStateSaveAreaHeader = getStateSaveAreaHeader();
if (pStateSaveAreaHeader == nullptr) {
DEBUG_BREAK_IF(pStateSaveAreaHeader == nullptr);
return nullptr;
}
switch (type) {
case ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.grf;
case ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.addr;
case ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.flag;
case ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.emask;
case ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.sr;
case ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.cr;
case ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.tdr;
case ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.acc;
case ZET_DEBUG_REGSET_TYPE_MME_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.mme;
case ZET_DEBUG_REGSET_TYPE_SP_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.sp;
case ZET_DEBUG_REGSET_TYPE_DBG_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.dbg;
case ZET_DEBUG_REGSET_TYPE_FC_INTEL_GPU:
return &pStateSaveAreaHeader->regHeader.fc;
case ZET_DEBUG_REGSET_TYPE_SBA_INTEL_GPU:
return DebugSessionImp::getSbaRegsetDesc();
default:
return nullptr;
}
}
uint32_t DebugSessionImp::getRegisterSize(uint32_t type) {
auto regset = typeToRegsetDesc(type);
if (regset) {
return regset->bytes;
}
return 0;
}
uint32_t DebugSessionImp::typeToRegsetFlags(uint32_t type) {
switch (type) {
case ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_SP_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_DBG_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_FC_INTEL_GPU:
return ZET_DEBUG_REGSET_FLAG_READABLE | ZET_DEBUG_REGSET_FLAG_WRITEABLE;
case ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU:
case ZET_DEBUG_REGSET_TYPE_SBA_INTEL_GPU:
return ZET_DEBUG_REGSET_FLAG_READABLE;
default:
return 0;
}
}
size_t DebugSessionImp::calculateThreadSlotOffset(EuThread::ThreadId threadId) {
auto pStateSaveAreaHeader = getStateSaveAreaHeader();
return pStateSaveAreaHeader->versionHeader.size * 8 + pStateSaveAreaHeader->regHeader.state_area_offset + ((((threadId.slice * pStateSaveAreaHeader->regHeader.num_subslices_per_slice + threadId.subslice) * pStateSaveAreaHeader->regHeader.num_eus_per_subslice + threadId.eu) * pStateSaveAreaHeader->regHeader.num_threads_per_eu + threadId.thread) * pStateSaveAreaHeader->regHeader.state_save_size);
}
size_t DebugSessionImp::calculateRegisterOffsetInThreadSlot(const SIP::regset_desc *regdesc, uint32_t start) {
return regdesc->offset + regdesc->bytes * start;
}
ze_result_t DebugSessionImp::readSbaRegisters(EuThread::ThreadId threadId, uint32_t start, uint32_t count, void *pRegisterValues) {
auto sbaRegDesc = DebugSessionImp::getSbaRegsetDesc();
if (start >= sbaRegDesc->num) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (start + count > sbaRegDesc->num) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
ze_result_t ret = ZE_RESULT_SUCCESS;
NEO::SbaTrackedAddresses sbaBuffer;
ret = readSbaBuffer(threadId, sbaBuffer);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
const auto &hwInfo = connectedDevice->getHwInfo();
const auto regSize = std::max(getRegisterSize(ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU), hwInfo.capabilityTable.grfSize);
auto r0 = std::make_unique<uint32_t[]>(regSize / sizeof(uint32_t));
ret = readRegistersImp(threadId, ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU, 0, 1, r0.get());
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
uint64_t bindingTableBaseAddress = ((r0[4] >> 5) << 5) + sbaBuffer.SurfaceStateBaseAddress;
uint64_t scratchSpaceBaseAddress = 0;
auto &gfxCoreHelper = NEO::GfxCoreHelper::get(connectedDevice->getNEODevice()->getHardwareInfo().platform.eRenderCoreFamily);
if (gfxCoreHelper.isScratchSpaceSurfaceStateAccessible()) {
auto surfaceStateForScratch = ((r0[5] >> 10) << 6);
if (surfaceStateForScratch > 0) {
uint64_t renderSurfaceStateGpuVa = surfaceStateForScratch + sbaBuffer.SurfaceStateBaseAddress;
constexpr size_t renderSurfaceStateSize = 64;
std::vector<char> renderSurfaceState(renderSurfaceStateSize, 0);
ret = readGpuMemory(allThreads[threadId]->getMemoryHandle(), renderSurfaceState.data(), renderSurfaceStateSize, renderSurfaceStateGpuVa);
if (ret != ZE_RESULT_SUCCESS) {
return ret;
}
auto scratchSpacePTSize = gfxCoreHelper.getRenderSurfaceStatePitch(renderSurfaceState.data());
auto threadOffset = getPerThreadScratchOffset(scratchSpacePTSize, threadId);
auto gmmHelper = connectedDevice->getNEODevice()->getGmmHelper();
auto scratchAllocationBase = gmmHelper->decanonize(gfxCoreHelper.getRenderSurfaceStateBaseAddress(renderSurfaceState.data()));
if (scratchAllocationBase != 0) {
scratchSpaceBaseAddress = threadOffset + scratchAllocationBase;
}
}
} else {
auto scratchPointer = ((r0[5] >> 10) << 10);
if (scratchPointer != 0) {
scratchSpaceBaseAddress = scratchPointer + sbaBuffer.GeneralStateBaseAddress;
}
}
std::vector<uint64_t> packed;
packed.push_back(sbaBuffer.GeneralStateBaseAddress);
packed.push_back(sbaBuffer.SurfaceStateBaseAddress);
packed.push_back(sbaBuffer.DynamicStateBaseAddress);
packed.push_back(sbaBuffer.IndirectObjectBaseAddress);
packed.push_back(sbaBuffer.InstructionBaseAddress);
packed.push_back(sbaBuffer.BindlessSurfaceStateBaseAddress);
packed.push_back(sbaBuffer.BindlessSamplerStateBaseAddress);
packed.push_back(bindingTableBaseAddress);
packed.push_back(scratchSpaceBaseAddress);
size_t size = count * sbaRegDesc->bytes;
memcpy_s(pRegisterValues, size, &packed[start], size);
return ZE_RESULT_SUCCESS;
}
ze_result_t DebugSession::getRegisterSetProperties(Device *device, uint32_t *pCount, zet_debug_regset_properties_t *pRegisterSetProperties) {
if (nullptr == pCount) {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
if (*pCount && !pRegisterSetProperties) {
return ZE_RESULT_ERROR_INVALID_NULL_POINTER;
}
auto &stateSaveAreaHeader = NEO::SipKernel::getBindlessDebugSipKernel(*device->getNEODevice()).getStateSaveAreaHeader();
if (stateSaveAreaHeader.size() == 0) {
*pCount = 0;
return ZE_RESULT_SUCCESS;
}
uint32_t totalRegsetNum = 0;
auto parseRegsetDesc = [&](const SIP::regset_desc &regsetDesc, zet_debug_regset_type_intel_gpu_t regsetType) {
if (regsetDesc.num) {
if (totalRegsetNum < *pCount) {
zet_debug_regset_properties_t regsetProps = {
ZET_STRUCTURE_TYPE_DEBUG_REGSET_PROPERTIES,
nullptr,
static_cast<uint32_t>(regsetType),
0,
DebugSessionImp::typeToRegsetFlags(regsetType),
0,
regsetDesc.num,
regsetDesc.bits,
regsetDesc.bytes,
};
pRegisterSetProperties[totalRegsetNum] = regsetProps;
}
++totalRegsetNum;
}
};
auto pStateSaveArea = reinterpret_cast<const SIP::StateSaveAreaHeader *>(stateSaveAreaHeader.data());
parseRegsetDesc(pStateSaveArea->regHeader.grf, ZET_DEBUG_REGSET_TYPE_GRF_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.addr, ZET_DEBUG_REGSET_TYPE_ADDR_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.flag, ZET_DEBUG_REGSET_TYPE_FLAG_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.emask, ZET_DEBUG_REGSET_TYPE_CE_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.sr, ZET_DEBUG_REGSET_TYPE_SR_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.cr, ZET_DEBUG_REGSET_TYPE_CR_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.tdr, ZET_DEBUG_REGSET_TYPE_TDR_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.acc, ZET_DEBUG_REGSET_TYPE_ACC_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.mme, ZET_DEBUG_REGSET_TYPE_MME_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.sp, ZET_DEBUG_REGSET_TYPE_SP_INTEL_GPU);
parseRegsetDesc(*DebugSessionImp::getSbaRegsetDesc(), ZET_DEBUG_REGSET_TYPE_SBA_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.dbg, ZET_DEBUG_REGSET_TYPE_DBG_INTEL_GPU);
parseRegsetDesc(pStateSaveArea->regHeader.fc, ZET_DEBUG_REGSET_TYPE_FC_INTEL_GPU);
if (!*pCount || (*pCount > totalRegsetNum)) {
*pCount = totalRegsetNum;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t DebugSessionImp::registersAccessHelper(const EuThread *thread, const SIP::regset_desc *regdesc,
uint32_t start, uint32_t count, void *pRegisterValues, bool write) {
if (start >= regdesc->num) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (start + count > regdesc->num) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
auto gpuVa = getContextStateSaveAreaGpuVa(thread->getMemoryHandle());
if (gpuVa == 0) {
return ZE_RESULT_ERROR_UNKNOWN;
}
char tssMagic[8] = {0};
readGpuMemory(thread->getMemoryHandle(), tssMagic, sizeof(tssMagic), gpuVa);
if (0 != strcmp(tssMagic, "tssarea")) {
return ZE_RESULT_ERROR_UNKNOWN;
}
auto threadSlotOffset = calculateThreadSlotOffset(thread->getThreadId());
SIP::sr_ident srMagic = {{0}};
auto srMagicOffset = threadSlotOffset + getStateSaveAreaHeader()->regHeader.sr_magic_offset;
readGpuMemory(thread->getMemoryHandle(), reinterpret_cast<char *>(&srMagic), sizeof(srMagic), gpuVa + srMagicOffset);
if (0 != strcmp(srMagic.magic, "srmagic")) {
return ZE_RESULT_ERROR_UNKNOWN;
}
auto startRegOffset = threadSlotOffset + calculateRegisterOffsetInThreadSlot(regdesc, start);
int ret = 0;
if (write) {
ret = writeGpuMemory(thread->getMemoryHandle(), static_cast<const char *>(pRegisterValues), count * regdesc->bytes, gpuVa + startRegOffset);
} else {
ret = readGpuMemory(thread->getMemoryHandle(), static_cast<char *>(pRegisterValues), count * regdesc->bytes, gpuVa + startRegOffset);
}
return ret == 0 ? ZE_RESULT_SUCCESS : ZE_RESULT_ERROR_UNKNOWN;
}
ze_result_t DebugSessionImp::cmdRegisterAccessHelper(const EuThread::ThreadId &threadId, SIP::sip_command &command, bool write) {
auto stateSaveAreaHeader = getStateSaveAreaHeader();
auto *regdesc = &stateSaveAreaHeader->regHeader.cmd;
PRINT_DEBUGGER_INFO_LOG("Access CMD %d for thread %s\n", command.command, EuThread::toString(threadId).c_str());
ze_result_t result = registersAccessHelper(allThreads[threadId].get(), regdesc, 0, 1, &command, write);
if (result != ZE_RESULT_SUCCESS) {
PRINT_DEBUGGER_ERROR_LOG("Failed to access CMD for thread %s\n", EuThread::toString(threadId).c_str());
}
return result;
}
ze_result_t DebugSessionImp::readRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) {
if (!isSingleThread(thread)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
auto threadId = convertToThreadId(thread);
if (!allThreads[threadId]->isStopped()) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
auto stateSaveAreaHeader = getStateSaveAreaHeader();
if (stateSaveAreaHeader == nullptr) {
return ZE_RESULT_ERROR_UNKNOWN;
}
if (type == ZET_DEBUG_REGSET_TYPE_SBA_INTEL_GPU) {
return readSbaRegisters(threadId, start, count, pRegisterValues);
}
return readRegistersImp(threadId, type, start, count, pRegisterValues);
}
ze_result_t DebugSessionImp::readRegistersImp(EuThread::ThreadId threadId, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) {
auto regdesc = typeToRegsetDesc(type);
if (nullptr == regdesc) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
return registersAccessHelper(allThreads[threadId].get(), regdesc, start, count, pRegisterValues, false);
}
ze_result_t DebugSessionImp::writeRegisters(ze_device_thread_t thread, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) {
if (!isSingleThread(thread)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
auto threadId = convertToThreadId(thread);
if (!allThreads[threadId]->isStopped()) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
auto stateSaveAreaHeader = getStateSaveAreaHeader();
if (stateSaveAreaHeader == nullptr) {
return ZE_RESULT_ERROR_UNKNOWN;
}
return writeRegistersImp(threadId, type, start, count, pRegisterValues);
}
ze_result_t DebugSessionImp::writeRegistersImp(EuThread::ThreadId threadId, uint32_t type, uint32_t start, uint32_t count, void *pRegisterValues) {
auto regdesc = typeToRegsetDesc(type);
if (nullptr == regdesc) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
if (ZET_DEBUG_REGSET_FLAG_WRITEABLE != ((DebugSessionImp::typeToRegsetFlags(type) & ZET_DEBUG_REGSET_FLAG_WRITEABLE))) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
return registersAccessHelper(allThreads[threadId].get(), regdesc, start, count, pRegisterValues, true);
}
bool DebugSessionImp::isValidGpuAddress(const zet_debug_memory_space_desc_t *desc) const {
if (desc->type == ZET_DEBUG_MEMORY_SPACE_TYPE_DEFAULT) {
auto gmmHelper = connectedDevice->getNEODevice()->getGmmHelper();
auto decanonizedAddress = gmmHelper->decanonize(desc->address);
bool validAddress = gmmHelper->isValidCanonicalGpuAddress(desc->address);
if (desc->address == decanonizedAddress || validAddress) {
return true;
}
} else if (desc->type == ZET_DEBUG_MEMORY_SPACE_TYPE_SLM) {
if (desc->address & (1 << slmAddressSpaceTag)) { // IGC sets bit 28 to identify SLM address
return true;
}
}
return false;
}
ze_result_t DebugSessionImp::validateThreadAndDescForMemoryAccess(ze_device_thread_t thread, const zet_debug_memory_space_desc_t *desc) {
if (!isValidGpuAddress(desc)) {
return ZE_RESULT_ERROR_INVALID_ARGUMENT;
}
ze_result_t status = sanityMemAccessThreadCheck(thread, desc);
if (status != ZE_RESULT_SUCCESS) {
return status;
}
return ZE_RESULT_SUCCESS;
}
ze_result_t DebugSessionImp::waitForCmdReady(EuThread::ThreadId threadId, uint16_t retryCount) {
ze_result_t status;
SIP::sip_command sipCommand = {0};
for (uint16_t attempts = 0; attempts < retryCount; attempts++) {
status = cmdRegisterAccessHelper(threadId, sipCommand, false);
if (status != ZE_RESULT_SUCCESS) {
return status;
}
if (sipCommand.command == static_cast<uint32_t>(NEO::SipKernel::COMMAND::READY)) {
break;
}
NEO::sleep(std::chrono::microseconds(100));
}
if (sipCommand.command != static_cast<uint32_t>(NEO::SipKernel::COMMAND::READY)) {
return ZE_RESULT_ERROR_NOT_AVAILABLE;
}
return ZE_RESULT_SUCCESS;
}
} // namespace L0