feature: add debugger functionality to global stateless feature

Related-To: NEO-10381

Signed-off-by: Zbigniew Zdanowicz <zbigniew.zdanowicz@intel.com>
This commit is contained in:
Zbigniew Zdanowicz
2024-03-26 11:32:20 +00:00
committed by Compute-Runtime-Automation
parent 562d6f114b
commit 00445d9285
5 changed files with 106 additions and 1 deletions

View File

@@ -381,7 +381,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper() && this->cmdListHeapAddressModel == NEO::HeapAddressModel::privateHeaps) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::surfaceState);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;

View File

@@ -233,6 +233,7 @@ struct CommandQueueHw : public CommandQueueImp {
NEO::LinearStream &commandStream,
CommandListRequiredStateChange &cmdListRequired);
inline void updateBaseAddressState(CommandList *lastCommandList);
inline void updateDebugSurfaceState(CommandListExecutionContext &ctx);
size_t alignedChildStreamPadding{};
};

View File

@@ -10,6 +10,7 @@
#include "shared/source/built_ins/built_ins.h"
#include "shared/source/built_ins/sip.h"
#include "shared/source/command_container/command_encoder.h"
#include "shared/source/command_container/encode_surface_state.h"
#include "shared/source/command_stream/command_stream_receiver_hw.h"
#include "shared/source/command_stream/linear_stream.h"
#include "shared/source/command_stream/preemption.h"
@@ -42,6 +43,8 @@
#include "level_zero/core/source/fence/fence.h"
#include "level_zero/core/source/helpers/error_code_helper_l0.h"
#include "encode_surface_state_args.h"
#include <algorithm>
#include <limits>
@@ -153,6 +156,8 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandListsRegular(
return ret;
}
this->updateDebugSurfaceState(ctx);
this->getGlobalFenceAndMakeItResident();
this->getWorkPartitionAndMakeItResident();
this->getGlobalStatelessHeapAndMakeItResident();
@@ -1427,6 +1432,31 @@ void CommandQueueHw<gfxCoreFamily>::updateBaseAddressState(CommandList *lastComm
csrHw->getIohState().updateAndCheck(ioh);
}
template <GFXCORE_FAMILY gfxCoreFamily>
void CommandQueueHw<gfxCoreFamily>::updateDebugSurfaceState(CommandListExecutionContext &ctx) {
if (this->cmdListHeapAddressModel == NEO::HeapAddressModel::globalStateless) {
if (ctx.isNEODebuggerActive(this->device) && ctx.gsbaStateDirty) {
auto globalStatelessHeap = this->csr->getGlobalStatelessHeap();
auto surfaceStateSpace = this->device->getNEODevice()->getDebugger()->getDebugSurfaceReservedSurfaceState(*globalStatelessHeap);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;
NEO::EncodeSurfaceStateArgs args;
args.outMemory = &surfaceState;
args.graphicsAddress = this->device->getDebugSurface()->getGpuAddress();
args.size = this->device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = this->device->getMOCS(false, false);
args.numAvailableDevices = this->device->getNEODevice()->getNumGenericSubDevices();
args.allocation = this->device->getDebugSurface();
args.gmmHelper = this->device->getNEODevice()->getGmmHelper();
args.areMultipleSubDevicesInContext = false;
args.isDebuggerActive = true;
NEO::EncodeSurfaceState<GfxFamily>::encodeBuffer(args);
*reinterpret_cast<typename GfxFamily::RENDER_SURFACE_STATE *>(surfaceStateSpace) = surfaceState;
}
}
}
template <GFXCORE_FAMILY gfxCoreFamily>
size_t CommandQueueHw<gfxCoreFamily>::estimateStateBaseAddressCmdSizeForMultipleCommandLists(bool &baseAddressStateDirty,
NEO::HeapAddressModel commandListHeapAddressModel,

View File

@@ -133,5 +133,14 @@ struct L0DebuggerHwParameterizedFixture : ::testing::TestWithParam<int>, public
DebugManagerStateRestore restorer;
};
struct L0DebuggerHwGlobalStatelessFixture : public L0DebuggerHwFixture {
void setUp() {
NEO::debugManager.flags.SelectCmdListHeapAddressModel.set(1);
L0DebuggerHwFixture::setUp();
}
DebugManagerStateRestore restorer;
};
} // namespace ult
} // namespace L0

View File

@@ -26,6 +26,7 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h"
namespace L0 {
@@ -33,6 +34,7 @@ namespace ult {
using L0DebuggerTest = Test<L0DebuggerHwFixture>;
using L0DebuggerParameterizedTests = L0DebuggerHwParameterizedFixture;
using L0DebuggerGlobalStatelessTest = Test<L0DebuggerHwGlobalStatelessFixture>;
TEST_F(L0DebuggerTest, givenL0DebuggerWhenGettingL0DebuggerThenValidDebuggerInstanceIsReturned) {
EXPECT_NE(nullptr, device->getL0Debugger());
@@ -865,5 +867,68 @@ HWTEST_F(DebuggerWithGlobalBindlessTest, GivenGlobalBindlessHeapWhenAppendingToI
commandList->destroy();
}
HWTEST2_F(L0DebuggerGlobalStatelessTest,
givenGlobalStatelessWhenCmdListExecutedOnQueueThenQueueDispatchesSurfaceStateOnceToGlobalStatelessHeap,
IsAtLeastXeHpCore) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto csr = neoDevice->getDefaultEngine().commandStreamReceiver;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, csr, &queueDesc, false, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue);
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::compute, 0u, returnValue, false));
auto cmdListHandle = commandList->toHandle();
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
Mock<Module> module(device, nullptr, ModuleType::user);
Mock<::L0::KernelImp> kernel;
kernel.module = &module;
auto statelessSurfaceHeap = csr->getGlobalStatelessHeap();
ASSERT_NE(nullptr, statelessSurfaceHeap);
returnValue = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
commandList->close();
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
auto debugSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(statelessSurfaceHeap->getCpuBase());
ASSERT_NE(debugSurfaceState, nullptr);
auto debugSurface = static_cast<::L0::DeviceImp *>(device)->getDebugSurface();
ASSERT_NE(debugSurface, nullptr);
ASSERT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress());
const auto mocsNoCache = device->getNEODevice()->getGmmHelper()->getMOCS(GMM_RESOURCE_USAGE_OCL_BUFFER_CACHELINE_MISALIGNED);
const auto actualMocs = debugSurfaceState->getMemoryObjectControlState();
EXPECT_EQ(actualMocs, mocsNoCache);
SurfaceStateBufferLength length;
length.length = static_cast<uint32_t>(debugSurface->getUnderlyingBufferSize() - 1);
EXPECT_EQ(length.surfaceState.depth + 1u, debugSurfaceState->getDepth());
EXPECT_EQ(length.surfaceState.width + 1u, debugSurfaceState->getWidth());
EXPECT_EQ(length.surfaceState.height + 1u, debugSurfaceState->getHeight());
memset(debugSurfaceState, 0, sizeof(RENDER_SURFACE_STATE));
returnValue = commandQueue->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr, 0, nullptr);
EXPECT_EQ(ZE_RESULT_SUCCESS, returnValue);
char zeroBuffer[sizeof(RENDER_SURFACE_STATE)];
memset(zeroBuffer, 0, sizeof(RENDER_SURFACE_STATE));
auto memCmpResult = memcmp(debugSurfaceState, zeroBuffer, sizeof(RENDER_SURFACE_STATE));
EXPECT_EQ(0, memCmpResult);
commandQueue->destroy();
}
} // namespace ult
} // namespace L0