feature: bindless global heap with debugger

- program debugSurface's SurfaceState at the beginning of Bindless Surface
State Heap - SPECIAL_SSH
- ensure SPECIAL_SSH is resident

Related-To: NEO-7063

Signed-off-by: Hoppe, Mateusz <mateusz.hoppe@intel.com>
This commit is contained in:
Hoppe, Mateusz
2023-09-28 19:33:44 +00:00
committed by Compute-Runtime-Automation
parent ba4c06811f
commit 5c565efe28
10 changed files with 181 additions and 13 deletions

View File

@@ -158,10 +158,13 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushImmedia
if (this->device->getL0Debugger()) {
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {
this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getGraphicsAllocation());
}
}
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger()) {
if (neoDevice->getDebugger() && !neoDevice->getBindlessHeapsHelper()) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto &sshState = csrHw->getSshState();
bool sshDirty = sshState.updateAndCheck(ssh);
@@ -270,10 +273,13 @@ NEO::CompletionStamp CommandListCoreFamilyImmediate<gfxCoreFamily>::flushRegular
UNRECOVERABLE_IF(!NEO::Debugger::isDebugEnabled(this->internalUsage));
this->csr->makeResident(*this->device->getL0Debugger()->getSbaTrackingBuffer(this->csr->getOsContext().getContextId()));
this->csr->makeResident(*this->device->getDebugSurface());
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {
this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getGraphicsAllocation());
}
}
NEO::Device *neoDevice = this->device->getNEODevice();
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing) {
if (neoDevice->getDebugger() && this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
auto csrHw = static_cast<NEO::CommandStreamReceiverHw<GfxFamily> *>(this->csr);
auto sshStateCopy = csrHw->getSshState();
bool sshDirty = sshStateCopy.updateAndCheck(ssh);

View File

@@ -220,7 +220,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
this->containsStatelessUncachedResource = dispatchKernelArgs.requiresUncachedMocs;
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;

View File

@@ -338,7 +338,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
}
}
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing) {
if (neoDevice->getDebugger() && !this->immediateCmdListHeapSharing && !neoDevice->getBindlessHeapsHelper()) {
auto *ssh = commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE);
auto surfaceStateSpace = neoDevice->getDebugger()->getDebugSurfaceReservedSurfaceState(*ssh);
auto surfaceState = GfxFamily::cmdInitRenderSurfaceState;

View File

@@ -913,6 +913,9 @@ void CommandQueueHw<gfxCoreFamily>::makeDebugSurfaceResidentIfNEODebuggerActive(
}
UNRECOVERABLE_IF(this->device->getDebugSurface() == nullptr);
this->csr->makeResident(*this->device->getDebugSurface());
if (this->device->getNEODevice()->getBindlessHeapsHelper()) {
this->csr->makeResident(*this->device->getNEODevice()->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getGraphicsAllocation());
}
}
template <GFXCORE_FAMILY gfxCoreFamily>

View File

@@ -18,6 +18,7 @@
#include "shared/source/execution_environment/execution_environment.h"
#include "shared/source/execution_environment/root_device_environment.h"
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/bindless_heaps_helper.h"
#include "shared/source/helpers/compiler_product_helper.h"
#include "shared/source/helpers/constants.h"
#include "shared/source/helpers/engine_node_helper.h"
@@ -25,6 +26,7 @@
#include "shared/source/helpers/ray_tracing_helper.h"
#include "shared/source/helpers/string.h"
#include "shared/source/helpers/topology_map.h"
#include "shared/source/indirect_heap/indirect_heap.h"
#include "shared/source/kernel/grf_config.h"
#include "shared/source/memory_manager/allocation_properties.h"
#include "shared/source/memory_manager/allocations_list.h"
@@ -56,6 +58,8 @@
#include "level_zero/tools/source/metrics/metric.h"
#include "level_zero/tools/source/sysman/sysman.h"
#include "encode_surface_state_args.h"
#include <algorithm>
namespace NEO {
@@ -1243,6 +1247,22 @@ Device *Device::create(DriverHandle *driverHandle, NEO::Device *neoDevice, bool
NEO::MemoryTransferHelper::transferMemoryToAllocation(productHelper.isBlitCopyRequiredForLocalMemory(rootDeviceEnvironment, *debugSurface),
*neoDevice, debugSurface, 0, stateSaveAreaHeader.data(),
stateSaveAreaHeader.size());
if (neoDevice->getBindlessHeapsHelper()) {
auto &gfxCoreHelper = neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[debugSurface->getRootDeviceIndex()]->getHelper<NEO::GfxCoreHelper>();
auto ssh = neoDevice->getBindlessHeapsHelper()->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getCpuBase();
NEO::EncodeSurfaceStateArgs args;
args.outMemory = ssh;
args.graphicsAddress = device->getDebugSurface()->getGpuAddress();
args.size = device->getDebugSurface()->getUnderlyingBufferSize();
args.mocs = device->getMOCS(false, false);
args.numAvailableDevices = neoDevice->getNumGenericSubDevices();
args.allocation = device->getDebugSurface();
args.gmmHelper = neoDevice->getGmmHelper();
args.useGlobalAtomics = false;
args.areMultipleSubDevicesInContext = neoDevice->getNumGenericSubDevices() > 1;
args.isDebuggerActive = true;
gfxCoreHelper.encodeBufferSurfaceState(args);
}
}
for (auto &neoSubDevice : neoDevice->getSubDevices()) {

View File

@@ -230,6 +230,8 @@ ze_result_t DriverHandleImp::initialize(std::vector<std::unique_ptr<NEO::Device>
auto osInterface = device->getNEODevice()->getRootDeviceEnvironment().osInterface.get();
if (osInterface && !osInterface->isDebugAttachAvailable() && enableProgramDebugging != NEO::DebuggingMode::Disabled) {
NEO::printDebugString(NEO::DebugManager.flags.PrintDebugMessages.get(), stderr,
"Debug mode is not enabled in the system.\n");
return ZE_RESULT_ERROR_DEPENDENCY_UNAVAILABLE;
}

View File

@@ -24,6 +24,9 @@ namespace ult {
struct L0DebuggerFixture {
void setUp() {
setUp(true);
}
void setUp(bool createDriver) {
auto executionEnvironment = new NEO::ExecutionEnvironment();
auto mockBuiltIns = new NEO::MockBuiltins();
@@ -48,13 +51,15 @@ struct L0DebuggerFixture {
neoDevice = NEO::MockDevice::create<NEO::MockDevice>(executionEnvironment, 0u);
NEO::DeviceVector devices;
devices.push_back(std::unique_ptr<NEO::Device>(neoDevice));
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
driverHandle->enableProgramDebugging = NEO::DebuggingMode::Online;
if (createDriver) {
NEO::DeviceVector devices;
devices.push_back(std::unique_ptr<NEO::Device>(neoDevice));
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
driverHandle->enableProgramDebugging = NEO::DebuggingMode::Online;
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
}
}
void tearDown() {

View File

@@ -12,6 +12,7 @@
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/helpers/mock_product_helper_hw.h"
#include "shared/test/common/helpers/raii_product_helper.h"
#include "shared/test/common/mocks/mock_bindless_heaps_helper.h"
#include "shared/test/common/mocks/mock_gmm_helper.h"
#include "shared/test/common/mocks/mock_graphics_allocation.h"
#include "shared/test/common/test_macros/hw_test.h"
@@ -739,6 +740,131 @@ HWTEST2_F(L0DebuggerTest, givenImmediateFlushTaskWhenAppendingKernelUsingNewHeap
kernelImmData->isaGraphicsAllocation.reset(nullptr);
commandList->destroy();
}
struct DebuggerWithGlobalBindlessFixture : public L0DebuggerFixture {
void setUp() {
NEO::DebugManager.flags.UseExternalAllocatorForSshAndDsh.set(true);
L0DebuggerFixture::setUp(false);
auto mockHelper = std::make_unique<MockBindlesHeapsHelper>(neoDevice->getMemoryManager(),
neoDevice->getNumGenericSubDevices() > 1,
neoDevice->getRootDeviceIndex(),
neoDevice->getDeviceBitfield());
mockHelper->globalBindlessDsh = false;
bindlessHelper = mockHelper.get();
neoDevice->getExecutionEnvironment()->rootDeviceEnvironments[neoDevice->getRootDeviceIndex()]->bindlessHeapsHelper.reset(mockHelper.release());
NEO::DeviceVector devices;
devices.push_back(std::unique_ptr<NEO::Device>(neoDevice));
driverHandle = std::make_unique<Mock<L0::DriverHandleImp>>();
driverHandle->enableProgramDebugging = NEO::DebuggingMode::Online;
driverHandle->initialize(std::move(devices));
device = driverHandle->devices[0];
}
void tearDown() {
L0DebuggerFixture::tearDown();
}
DebugManagerStateRestore restorer;
MockBindlesHeapsHelper *bindlessHelper = nullptr;
};
using DebuggerWithGlobalBindlessTest = Test<DebuggerWithGlobalBindlessFixture>;
HWTEST_F(DebuggerWithGlobalBindlessTest, GivenGlobalBindlessHeapWhenDeviceIsCreatedThenDebugSurfaceStateIsProgrammedAtBindlessOffsetZero) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
auto globalBindlessBase = bindlessHelper->getGlobalHeapsBase();
auto debugSurfaceState = reinterpret_cast<RENDER_SURFACE_STATE *>(bindlessHelper->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getCpuBase());
auto debugSurface = static_cast<L0::DeviceImp *>(device)->getDebugSurface();
EXPECT_EQ(globalBindlessBase, bindlessHelper->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getHeapGpuBase());
SURFACE_STATE_BUFFER_LENGTH length;
length.length = static_cast<uint32_t>(debugSurface->getUnderlyingBufferSize() - 1);
EXPECT_EQ(length.surfaceState.depth + 1u, debugSurfaceState->getDepth());
EXPECT_EQ(length.surfaceState.width + 1u, debugSurfaceState->getWidth());
EXPECT_EQ(length.surfaceState.height + 1u, debugSurfaceState->getHeight());
EXPECT_EQ(debugSurface->getGpuAddress(), debugSurfaceState->getSurfaceBaseAddress());
}
HWTEST_F(DebuggerWithGlobalBindlessTest, GivenGlobalBindlessHeapWhenAppendingKernelToCmdListThenCmdContainerSshIsNotUsedForDebugSurface) {
using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE;
Mock<::L0::KernelImp> kernel;
kernel.descriptor.kernelAttributes.bufferAddressingMode = NEO::KernelDescriptor::BindlessAndStateless;
kernel.descriptor.kernelAttributes.imageAddressingMode = NEO::KernelDescriptor::Bindless;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->close();
auto ssh = commandList->getCmdContainer().getIndirectHeap(NEO::HeapType::SURFACE_STATE);
EXPECT_EQ(nullptr, ssh);
}
HWTEST_F(DebuggerWithGlobalBindlessTest, GivenGlobalBindlessHeapWhenExecutingCmdListThenSpecialSshIsResident) {
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
auto commandQueue = whiteboxCast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue);
ze_command_list_handle_t commandLists[] = {
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()};
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
auto commandList = CommandList::fromHandle(commandLists[0]);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
Mock<::L0::KernelImp> kernel;
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
commandList->close();
memoryOperationsHandler->captureGfxAllocationsForMakeResident = true;
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto allocIter = std::find(memoryOperationsHandler->gfxAllocationsForMakeResident.begin(),
memoryOperationsHandler->gfxAllocationsForMakeResident.end(),
bindlessHelper->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getGraphicsAllocation());
EXPECT_EQ(memoryOperationsHandler->gfxAllocationsForMakeResident.end(), allocIter);
commandList->destroy();
commandQueue->destroy();
}
HWTEST_F(DebuggerWithGlobalBindlessTest, GivenGlobalBindlessHeapWhenAppendingToImmCmdListThenSpecialSshIsResident) {
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
ze_command_list_handle_t commandListHandle = CommandList::createImmediate(productFamily, device, &queueDesc, false, NEO::EngineGroupType::RenderCompute, returnValue);
auto commandList = CommandList::fromHandle(commandListHandle);
ze_group_count_t groupCount{1, 1, 1};
CmdListKernelLaunchParams launchParams = {};
Mock<::L0::KernelImp> kernel;
memoryOperationsHandler->captureGfxAllocationsForMakeResident = true;
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
auto allocIter = std::find(memoryOperationsHandler->gfxAllocationsForMakeResident.begin(),
memoryOperationsHandler->gfxAllocationsForMakeResident.end(),
bindlessHelper->getHeap(NEO::BindlessHeapsHelper::SPECIAL_SSH)->getGraphicsAllocation());
EXPECT_EQ(memoryOperationsHandler->gfxAllocationsForMakeResident.end(), allocIter);
commandList->destroy();
}
} // namespace ult
} // namespace L0