performance(debugger): optimize ATT handling - minimize mem allocs

- do not allocate state save area every time attention event
is handled
- keep allocated memory for subsequent events
- remove not needed DBEUG_BREAK

Related-To: NEO-8183

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe 2023-07-17 13:44:19 +00:00 committed by Compute-Runtime-Automation
parent 6d458cd002
commit 8e07dd30cb
5 changed files with 54 additions and 12 deletions

View File

@ -147,6 +147,12 @@ struct DebugSessionImp : DebugSession {
return timeDifferenceMs;
}
void allocateStateSaveAreaMemory(size_t size) {
if (stateSaveAreaMemory.size() < size) {
stateSaveAreaMemory.resize(size);
}
}
std::chrono::high_resolution_clock::time_point interruptTime;
std::atomic<bool> interruptSent = false;
std::atomic<bool> triggerEvents = false;
@ -161,6 +167,7 @@ struct DebugSessionImp : DebugSession {
std::vector<char> stateSaveAreaHeader;
SIP::version minSlmSipVersion = {2, 1, 0};
bool sipSupportsSlm = false;
std::vector<char> stateSaveAreaMemory;
std::vector<std::pair<DebugSessionImp *, bool>> tileSessions; // DebugSession, attached
bool tileAttachEnabled = false;

View File

@ -1312,8 +1312,6 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte
if (threadsWithAttention.size() > 0) {
auto gpuVa = getContextStateSaveAreaGpuVa(vmHandle);
auto stateSaveAreaSize = getContextStateSaveAreaSize(vmHandle);
std::unique_ptr<char[]> stateSaveArea = nullptr;
auto stateSaveReadResult = ZE_RESULT_ERROR_UNKNOWN;
std::unique_lock<std::mutex> lock;
@ -1330,8 +1328,8 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte
getNotStoppedThreads(threadsWithAttention, newThreads);
if (newThreads.size() > 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
allocateStateSaveAreaMemory(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(vmHandle, stateSaveAreaMemory.data(), stateSaveAreaSize, gpuVa);
}
} else {
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
@ -1343,9 +1341,9 @@ void DebugSessionLinux::handleAttentionEvent(prelim_drm_i915_debug_event_eu_atte
PRINT_DEBUGGER_THREAD_LOG("ATTENTION event for thread: %s\n", EuThread::toString(threadId).c_str());
if (tileSessionsEnabled) {
static_cast<TileDebugSessionLinux *>(tileSessions[tileIndex].first)->addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveArea.get());
static_cast<TileDebugSessionLinux *>(tileSessions[tileIndex].first)->addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveAreaMemory.data());
} else {
addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveArea.get());
addThreadToNewlyStoppedFromRaisedAttention(threadId, vmHandle, stateSaveAreaMemory.data());
}
}
}
@ -1514,7 +1512,6 @@ void DebugSessionLinux::checkStoppedThreadsAndGenerateEvents(const std::vector<E
std::unique_ptr<uint8_t[]> bitmask;
size_t bitmaskSize;
[[maybe_unused]] auto attReadResult = threadControl(threads, deviceIndex, ThreadControlCmd::Stopped, bitmask, bitmaskSize);
DEBUG_BREAK_IF(attReadResult != 0);
// error querying STOPPED threads - no threads available ( for example: threads have completed )
if (attReadResult != 0) {

View File

@ -265,8 +265,6 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU
auto gpuVa = getContextStateSaveAreaGpuVa(memoryHandle);
auto stateSaveAreaSize = getContextStateSaveAreaSize(memoryHandle);
std::unique_ptr<char[]> stateSaveArea = nullptr;
auto stateSaveReadResult = ZE_RESULT_ERROR_UNKNOWN;
std::unique_lock<std::mutex> lock(threadStateMutex);
@ -275,8 +273,8 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU
std::vector<EuThread::ThreadId> newThreads;
getNotStoppedThreads(threadsWithAttention, newThreads);
if (newThreads.size() > 0) {
stateSaveArea = std::make_unique<char[]>(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveArea.get(), stateSaveAreaSize, gpuVa);
allocateStateSaveAreaMemory(stateSaveAreaSize);
stateSaveReadResult = readGpuMemory(memoryHandle, stateSaveAreaMemory.data(), stateSaveAreaSize, gpuVa);
}
} else {
PRINT_DEBUGGER_ERROR_LOG("Context state save area bind info invalid\n", "");
@ -287,7 +285,7 @@ ze_result_t DebugSessionWindows::handleEuAttentionBitsEvent(DBGUMD_READ_EVENT_EU
for (auto &threadId : threadsWithAttention) {
PRINT_DEBUGGER_THREAD_LOG("ATTENTION event for thread: %s\n", EuThread::toString(threadId).c_str());
addThreadToNewlyStoppedFromRaisedAttention(threadId, memoryHandle, stateSaveArea.get());
addThreadToNewlyStoppedFromRaisedAttention(threadId, memoryHandle, stateSaveAreaMemory.data());
}
}
}

View File

@ -1783,6 +1783,44 @@ TEST(DebugSessionTest, givenStoppedThreadWhenGettingNotStoppedThreadsThenOnlyRun
EXPECT_EQ(thread1, newStops[0]);
}
TEST(DebugSessionTest, givenSizeBiggerThanPreviousWhenAllocatingStateSaveAreaMemoryThenNewMemoryIsAllocated) {
zet_debug_config_t config = {};
config.pid = 0x1234;
auto hwInfo = *NEO::defaultHwInfo.get();
NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());
auto sessionMock = std::make_unique<MockDebugSession>(config, &deviceImp);
EXPECT_EQ(0u, sessionMock->stateSaveAreaMemory.size());
sessionMock->allocateStateSaveAreaMemory(0x1000);
EXPECT_EQ(0x1000u, sessionMock->stateSaveAreaMemory.size());
sessionMock->allocateStateSaveAreaMemory(0x2000);
EXPECT_EQ(0x2000u, sessionMock->stateSaveAreaMemory.size());
}
TEST(DebugSessionTest, givenTheSameSizeWhenAllocatingStateSaveAreaMemoryThenNewMemoryIsNotAllocated) {
zet_debug_config_t config = {};
config.pid = 0x1234;
auto hwInfo = *NEO::defaultHwInfo.get();
NEO::MockDevice *neoDevice(NEO::MockDevice::createWithNewExecutionEnvironment<NEO::MockDevice>(&hwInfo, 0));
Mock<L0::DeviceImp> deviceImp(neoDevice, neoDevice->getExecutionEnvironment());
auto sessionMock = std::make_unique<MockDebugSession>(config, &deviceImp);
EXPECT_EQ(0u, sessionMock->stateSaveAreaMemory.size());
sessionMock->allocateStateSaveAreaMemory(0x1000);
EXPECT_EQ(0x1000u, sessionMock->stateSaveAreaMemory.size());
auto oldMem = sessionMock->stateSaveAreaMemory.data();
sessionMock->allocateStateSaveAreaMemory(0x1000);
EXPECT_EQ(oldMem, sessionMock->stateSaveAreaMemory.data());
}
using MultiTileDebugSessionTest = Test<MultipleDevicesWithCustomHwInfo>;
TEST_F(MultiTileDebugSessionTest, givenThreadsFromMultipleTilesWhenResumeCalledThenThreadsResumedInAllTiles) {

View File

@ -141,6 +141,7 @@ struct MockDebugSession : public L0::DebugSessionImp {
using L0::DebugSession::debugArea;
using L0::DebugSessionImp::addThreadToNewlyStoppedFromRaisedAttention;
using L0::DebugSessionImp::allocateStateSaveAreaMemory;
using L0::DebugSessionImp::apiEvents;
using L0::DebugSessionImp::applyResumeWa;
using L0::DebugSessionImp::calculateThreadSlotOffset;
@ -156,6 +157,7 @@ struct MockDebugSession : public L0::DebugSessionImp {
using L0::DebugSessionImp::registersAccessHelper;
using L0::DebugSessionImp::resumeAccidentallyStoppedThreads;
using L0::DebugSessionImp::sendInterrupts;
using L0::DebugSessionImp::stateSaveAreaMemory;
using L0::DebugSessionImp::typeToRegsetDesc;
using L0::DebugSessionImp::validateAndSetStateSaveAreaHeader;