SBA tracking for single address space

Related-To: NEO-6539


Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2022-03-23 08:57:31 +00:00
committed by Compute-Runtime-Automation
parent 4374197c9d
commit beff0019d1
25 changed files with 977 additions and 119 deletions

View File

@ -148,7 +148,11 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
preemptionSize += NEO::PreemptionHelper::getRequiredCmdStreamSize<GfxFamily>(devicePreemption, commandQueuePreemptionMode);
if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) {
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(neoDevice->getSourceLevelDebugger() != nullptr);
if (neoDevice->getSourceLevelDebugger() != nullptr) {
debuggerCmdsSize += NEO::PreambleHelper<GfxFamily>::getKernelDebuggingCommandsSize(true);
} else if (device->getL0Debugger()) {
debuggerCmdsSize += device->getL0Debugger()->getSbaAddressLoadCommandsSize();
}
}
if (devicePreemption == NEO::PreemptionMode::MidThread) {
@ -275,7 +279,7 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
size_t padding = alignedSize - linearStreamSizeEstimate;
reserveLinearStreamSize(alignedSize);
NEO::LinearStream child(commandStream->getSpace(alignedSize), alignedSize);
child.setGpuBase(ptrOffset(commandStream->getGpuBase(), commandStream->getUsed()));
child.setGpuBase(ptrOffset(commandStream->getGpuBase(), commandStream->getUsed() - alignedSize));
const auto globalFenceAllocation = csr->getGlobalFenceAllocation();
if (globalFenceAllocation) {
@ -307,9 +311,15 @@ ze_result_t CommandQueueHw<gfxCoreFamily>::executeCommandLists(
programPipelineSelect(child);
}
if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed && neoDevice->getSourceLevelDebugger()) {
NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child);
commandQueueDebugCmdsProgrammed = true;
if (NEO::Debugger::isDebugEnabled(internalUsage) && !commandQueueDebugCmdsProgrammed) {
if (neoDevice->getSourceLevelDebugger()) {
NEO::PreambleHelper<GfxFamily>::programKernelDebugging(&child);
commandQueueDebugCmdsProgrammed = true;
} else if (device->getL0Debugger()) {
device->getL0Debugger()->programSbaAddressLoad(child,
device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress());
commandQueueDebugCmdsProgrammed = true;
}
}
if (gsbaStateDirty) {

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2020 Intel Corporation
# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -9,6 +9,8 @@ set(L0_SRCS_DEBUGGER
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.cpp
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.h
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0.inl
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_base.inl
${CMAKE_CURRENT_SOURCE_DIR}/debugger_l0_tgllp_and_later.inl
)
add_subdirectories()

View File

@ -24,24 +24,34 @@ DebugerL0CreateFn debuggerL0Factory[IGFX_MAX_CORE] = {};
DebuggerL0::DebuggerL0(NEO::Device *device) : device(device) {
isLegacyMode = false;
initialize();
}
void DebuggerL0::initialize() {
auto &engines = device->getMemoryManager()->getRegisteredEngines();
sbaTrackingGpuVa = device->getMemoryManager()->reserveGpuAddress(MemoryConstants::pageSize, device->getRootDeviceIndex());
if (NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.get() != -1) {
setSingleAddressSpaceSbaTracking(NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.get());
}
auto &engines = device->getMemoryManager()->getRegisteredEngines();
NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize,
NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER,
false,
device->getDeviceBitfield()};
properties.gpuAddress = sbaTrackingGpuVa.address;
if (!singleAddressSpaceSbaTracking) {
sbaTrackingGpuVa = device->getMemoryManager()->reserveGpuAddress(MemoryConstants::pageSize, device->getRootDeviceIndex());
properties.gpuAddress = sbaTrackingGpuVa.address;
}
SbaTrackedAddresses sbaHeader;
for (auto &engine : engines) {
properties.osContext = engine.osContext;
if (!singleAddressSpaceSbaTracking) {
properties.osContext = engine.osContext;
}
auto sbaAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties);
memset(sbaAllocation->getUnderlyingBuffer(), 0, sbaAllocation->getUnderlyingBufferSize());
@ -102,7 +112,9 @@ DebuggerL0 ::~DebuggerL0() {
for (auto &alloc : perContextSbaAllocations) {
device->getMemoryManager()->freeGraphicsMemory(alloc.second);
}
device->getMemoryManager()->freeGpuAddress(sbaTrackingGpuVa, device->getRootDeviceIndex());
if (sbaTrackingGpuVa.size != 0) {
device->getMemoryManager()->freeGpuAddress(sbaTrackingGpuVa, device->getRootDeviceIndex());
}
device->getMemoryManager()->freeGraphicsMemory(moduleDebugArea);
}

View File

@ -12,6 +12,7 @@
#include <level_zero/ze_api.h>
#include <cstdint>
#include <memory>
#include <unordered_map>
@ -99,10 +100,16 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
virtual size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) = 0;
virtual void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) = 0;
virtual size_t getSbaAddressLoadCommandsSize() = 0;
virtual void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) = 0;
MOCKABLE_VIRTUAL bool attachZebinModuleToSegmentAllocations(const StackVec<NEO::GraphicsAllocation *, 32> &kernelAlloc, uint32_t &moduleHandle);
MOCKABLE_VIRTUAL bool removeZebinModule(uint32_t moduleHandle);
void setSingleAddressSpaceSbaTracking(bool value) {
singleAddressSpaceSbaTracking = value;
}
protected:
static bool isAnyTrackedAddressChanged(SbaAddresses sba) {
return sba.GeneralStateBaseAddress != 0 ||
@ -116,10 +123,11 @@ class DebuggerL0 : public NEO::Debugger, NEO::NonCopyableOrMovableClass {
NEO::Device *device = nullptr;
NEO::GraphicsAllocation *sbaAllocation = nullptr;
std::unordered_map<uint32_t, NEO::GraphicsAllocation *> perContextSbaAllocations;
NEO::AddressRange sbaTrackingGpuVa;
NEO::AddressRange sbaTrackingGpuVa{};
NEO::GraphicsAllocation *moduleDebugArea = nullptr;
std::atomic<uint32_t> commandQueueCount = 0u;
uint32_t uuidL0CommandQueueHandle = 0;
bool singleAddressSpaceSbaTracking = false;
};
using DebugerL0CreateFn = DebuggerL0 *(*)(NEO::Device *device);
@ -132,6 +140,10 @@ class DebuggerL0Hw : public DebuggerL0 {
size_t getSbaTrackingCommandsSize(size_t trackedAddressCount) override;
void programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) override;
size_t getSbaAddressLoadCommandsSize() override;
void programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) override;
void programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba);
protected:
DebuggerL0Hw(NEO::Device *device) : DebuggerL0(device){};

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2020-2021 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -16,14 +16,13 @@
namespace L0 {
template <typename GfxFamily>
size_t DebuggerL0Hw<GfxFamily>::getSbaTrackingCommandsSize(size_t trackedAddressCount) {
return trackedAddressCount * NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize();
}
template <typename GfxFamily>
void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommands(NEO::LinearStream &cmdStream, const SbaAddresses &sba) {
auto gpuAddress = NEO::GmmHelper::decanonize(sbaTrackingGpuVa.address);
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
const auto gpuAddress = NEO::GmmHelper::decanonize(sbaTrackingGpuVa.address);
PRINT_DEBUGGER_INFO_LOG("Debugger: SBA stored ssh = %" SCNx64
" gsba = %" SCNx64
@ -34,59 +33,63 @@ void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommands(NEO::LinearStream &cmdS
sba.SurfaceStateBaseAddress, sba.GeneralStateBaseAddress, sba.DynamicStateBaseAddress,
sba.IndirectObjectBaseAddress, sba.InstructionBaseAddress, sba.BindlessSurfaceStateBaseAddress);
if (sba.GeneralStateBaseAddress) {
auto generalStateBaseAddress = NEO::GmmHelper::decanonize(sba.GeneralStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress),
static_cast<uint32_t>(generalStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(generalStateBaseAddress >> 32),
true,
false);
}
if (sba.SurfaceStateBaseAddress) {
auto surfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.SurfaceStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress),
static_cast<uint32_t>(surfaceStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(surfaceStateBaseAddress >> 32),
true,
false);
}
if (sba.DynamicStateBaseAddress) {
auto dynamicStateBaseAddress = NEO::GmmHelper::decanonize(sba.DynamicStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress),
static_cast<uint32_t>(dynamicStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(dynamicStateBaseAddress >> 32),
true,
false);
}
if (sba.IndirectObjectBaseAddress) {
auto indirectObjectBaseAddress = NEO::GmmHelper::decanonize(sba.IndirectObjectBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress),
static_cast<uint32_t>(indirectObjectBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(indirectObjectBaseAddress >> 32),
true,
false);
}
if (sba.InstructionBaseAddress) {
auto instructionBaseAddress = NEO::GmmHelper::decanonize(sba.InstructionBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, InstructionBaseAddress),
static_cast<uint32_t>(instructionBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(instructionBaseAddress >> 32),
true,
false);
}
if (sba.BindlessSurfaceStateBaseAddress) {
auto bindlessSurfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.BindlessSurfaceStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress),
static_cast<uint32_t>(bindlessSurfaceStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(bindlessSurfaceStateBaseAddress >> 32),
true,
false);
if (singleAddressSpaceSbaTracking) {
programSbaTrackingCommandsSingleAddressSpace(cmdStream, sba);
} else {
if (sba.GeneralStateBaseAddress) {
auto generalStateBaseAddress = NEO::GmmHelper::decanonize(sba.GeneralStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress),
static_cast<uint32_t>(generalStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(generalStateBaseAddress >> 32),
true,
false);
}
if (sba.SurfaceStateBaseAddress) {
auto surfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.SurfaceStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress),
static_cast<uint32_t>(surfaceStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(surfaceStateBaseAddress >> 32),
true,
false);
}
if (sba.DynamicStateBaseAddress) {
auto dynamicStateBaseAddress = NEO::GmmHelper::decanonize(sba.DynamicStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress),
static_cast<uint32_t>(dynamicStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(dynamicStateBaseAddress >> 32),
true,
false);
}
if (sba.IndirectObjectBaseAddress) {
auto indirectObjectBaseAddress = NEO::GmmHelper::decanonize(sba.IndirectObjectBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress),
static_cast<uint32_t>(indirectObjectBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(indirectObjectBaseAddress >> 32),
true,
false);
}
if (sba.InstructionBaseAddress) {
auto instructionBaseAddress = NEO::GmmHelper::decanonize(sba.InstructionBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, InstructionBaseAddress),
static_cast<uint32_t>(instructionBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(instructionBaseAddress >> 32),
true,
false);
}
if (sba.BindlessSurfaceStateBaseAddress) {
auto bindlessSurfaceStateBaseAddress = NEO::GmmHelper::decanonize(sba.BindlessSurfaceStateBaseAddress);
NEO::EncodeStoreMemory<GfxFamily>::programStoreDataImm(cmdStream,
gpuAddress + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress),
static_cast<uint32_t>(bindlessSurfaceStateBaseAddress & 0x0000FFFFFFFFULL),
static_cast<uint32_t>(bindlessSurfaceStateBaseAddress >> 32),
true,
false);
}
}
}
@ -95,4 +98,31 @@ DebuggerL0 *DebuggerL0Hw<GfxFamily>::allocate(NEO::Device *device) {
return new DebuggerL0Hw<GfxFamily>(device);
}
template <typename GfxFamily>
size_t DebuggerL0Hw<GfxFamily>::getSbaAddressLoadCommandsSize() {
if (!singleAddressSpaceSbaTracking) {
return 0;
}
return 2 * sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM);
}
template <typename GfxFamily>
void DebuggerL0Hw<GfxFamily>::programSbaAddressLoad(NEO::LinearStream &cmdStream, uint64_t sbaGpuVa) {
if (!singleAddressSpaceSbaTracking) {
return;
}
uint32_t low = sbaGpuVa & 0xffffffff;
uint32_t high = (sbaGpuVa >> 32) & 0xffffffff;
NEO::LriHelper<GfxFamily>::program(&cmdStream,
CS_GPR_R15,
low,
true);
NEO::LriHelper<GfxFamily>::program(&cmdStream,
CS_GPR_R15 + 4,
high,
true);
}
} // namespace L0

View File

@ -0,0 +1,22 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace L0 {
template <typename GfxFamily>
size_t DebuggerL0Hw<GfxFamily>::getSbaTrackingCommandsSize(size_t trackedAddressCount) {
if (singleAddressSpaceSbaTracking) {
UNRECOVERABLE_IF(true);
return 0;
}
return trackedAddressCount * NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize();
}
template <typename GfxFamily>
void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba) {
UNRECOVERABLE_IF(true);
}
} // namespace L0

View File

@ -0,0 +1,168 @@
/*
* Copyright (C) 2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
namespace L0 {
template <typename GfxFamily>
size_t DebuggerL0Hw<GfxFamily>::getSbaTrackingCommandsSize(size_t trackedAddressCount) {
if (singleAddressSpaceSbaTracking) {
constexpr uint32_t aluCmdSize = sizeof(typename GfxFamily::MI_MATH) + sizeof(typename GfxFamily::MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
return 2 * (sizeof(typename GfxFamily::MI_ARB_CHECK) + sizeof(typename GfxFamily::MI_BATCH_BUFFER_START)) +
trackedAddressCount * (sizeof(typename GfxFamily::MI_LOAD_REGISTER_IMM) + aluCmdSize + 2 * sizeof(typename GfxFamily::MI_STORE_REGISTER_MEM) +
3 * sizeof(typename GfxFamily::MI_STORE_DATA_IMM) +
sizeof(typename GfxFamily::MI_ARB_CHECK) +
sizeof(typename GfxFamily::MI_BATCH_BUFFER_START));
}
return trackedAddressCount * NEO::EncodeStoreMemory<GfxFamily>::getStoreDataImmSize();
}
template <typename GfxFamily>
void DebuggerL0Hw<GfxFamily>::programSbaTrackingCommandsSingleAddressSpace(NEO::LinearStream &cmdStream, const SbaAddresses &sba) {
using MI_STORE_DATA_IMM = typename GfxFamily::MI_STORE_DATA_IMM;
using MI_STORE_REGISTER_MEM = typename GfxFamily::MI_STORE_REGISTER_MEM;
using MI_BATCH_BUFFER_START = typename GfxFamily::MI_BATCH_BUFFER_START;
using MI_ARB_CHECK = typename GfxFamily::MI_ARB_CHECK;
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
using MI_NOOP = typename GfxFamily::MI_NOOP;
const auto offsetToAddress = offsetof(MI_STORE_DATA_IMM, TheStructure.RawData[1]);
const auto offsetToData = offsetof(MI_STORE_DATA_IMM, TheStructure.Common.DataDword0);
UNRECOVERABLE_IF(!singleAddressSpaceSbaTracking);
std::vector<std::pair<size_t, uint64_t>> fieldOffsetAndValue;
if (sba.GeneralStateBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, GeneralStateBaseAddress), NEO::GmmHelper::decanonize(sba.GeneralStateBaseAddress)});
}
if (sba.SurfaceStateBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress), NEO::GmmHelper::decanonize(sba.SurfaceStateBaseAddress)});
}
if (sba.DynamicStateBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, DynamicStateBaseAddress), NEO::GmmHelper::decanonize(sba.DynamicStateBaseAddress)});
}
if (sba.IndirectObjectBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress), NEO::GmmHelper::decanonize(sba.IndirectObjectBaseAddress)});
}
if (sba.InstructionBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, InstructionBaseAddress), NEO::GmmHelper::decanonize(sba.InstructionBaseAddress)});
}
if (sba.BindlessSurfaceStateBaseAddress) {
fieldOffsetAndValue.push_back({offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress), NEO::GmmHelper::decanonize(sba.BindlessSurfaceStateBaseAddress)});
}
const auto cmdStreamGpuBase = cmdStream.getGpuBase();
const auto cmdStreamCpuBase = reinterpret_cast<uint64_t>(cmdStream.getCpuBase());
if (fieldOffsetAndValue.size()) {
auto arb = cmdStream.getSpaceForCmd<MI_ARB_CHECK>();
auto arbCmd = GfxFamily::cmdInitArbCheck;
arbCmd.setPreParserDisable(true);
*arb = arbCmd;
// Jump to SDI command that is modified
auto newBuffer = cmdStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
const auto nextCommand = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast<uint64_t>(cmdStream.getSpace(0)), cmdStreamCpuBase));
MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart;
bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
bbCmd.setBatchBufferStartAddress(nextCommand);
bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
*newBuffer = bbCmd;
}
for (const auto &pair : fieldOffsetAndValue) {
// Store SBA field offset to R0
NEO::EncodeSetMMIO<GfxFamily>::encodeIMM(cmdStream, CS_GPR_R0, static_cast<uint32_t>(pair.first), true);
// Add GPR0 to GPR15, store result in GPR1
NEO::EncodeMath<GfxFamily>::addition(cmdStream, AluRegisters::R_0, AluRegisters::R_15, AluRegisters::R_1);
// Cmds to store dest address - from GPR
auto miStoreRegMemLow = cmdStream.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
auto miStoreRegMemHigh = cmdStream.getSpaceForCmd<MI_STORE_REGISTER_MEM>();
// Cmd to store value ( SBA address )
auto miStoreDataSettingSbaBufferAddress = cmdStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
auto miStoreDataSettingSbaBufferAddress2 = cmdStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
auto arb = cmdStream.getSpaceForCmd<MI_ARB_CHECK>();
auto arbCmd = GfxFamily::cmdInitArbCheck;
arbCmd.setPreParserDisable(true);
*arb = arbCmd;
// Jump to SDI command that is modified
auto newBuffer = cmdStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
const auto addressOfSDI = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast<uint64_t>(cmdStream.getSpace(0)), cmdStreamCpuBase));
// Cmd to store value ( SBA address )
auto miStoreSbaField = cmdStream.getSpaceForCmd<MI_STORE_DATA_IMM>();
auto gpuVaOfAddress = addressOfSDI + offsetToAddress;
auto gpuVaOfData = addressOfSDI + offsetToData;
const auto gpuVaOfDataDWORD1 = gpuVaOfData + 4;
MI_STORE_REGISTER_MEM srmCmdLow = GfxFamily::cmdInitStoreRegisterMem;
srmCmdLow.setRegisterAddress(CS_GPR_R1);
srmCmdLow.setMemoryAddress(gpuVaOfAddress);
NEO::EncodeStoreMMIO<GfxFamily>::remapOffset(&srmCmdLow);
*miStoreRegMemLow = srmCmdLow;
MI_STORE_REGISTER_MEM srmCmdHigh = GfxFamily::cmdInitStoreRegisterMem;
srmCmdHigh.setRegisterAddress(CS_GPR_R1 + 4);
srmCmdHigh.setMemoryAddress(gpuVaOfAddress + 4);
NEO::EncodeStoreMMIO<GfxFamily>::remapOffset(&srmCmdHigh);
*miStoreRegMemHigh = srmCmdHigh;
MI_STORE_DATA_IMM setSbaBufferAddress = GfxFamily::cmdInitStoreDataImm;
setSbaBufferAddress.setAddress(gpuVaOfData);
setSbaBufferAddress.setStoreQword(false);
setSbaBufferAddress.setDataDword0(pair.second & 0xffffffff);
setSbaBufferAddress.setDataDword1(0);
setSbaBufferAddress.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
*miStoreDataSettingSbaBufferAddress = setSbaBufferAddress;
setSbaBufferAddress.setAddress(gpuVaOfDataDWORD1);
setSbaBufferAddress.setStoreQword(false);
setSbaBufferAddress.setDataDword0((pair.second >> 32) & 0xffffffff);
setSbaBufferAddress.setDataDword1(0);
setSbaBufferAddress.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH::DWORD_LENGTH_STORE_DWORD);
*miStoreDataSettingSbaBufferAddress2 = setSbaBufferAddress;
MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart;
bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
bbCmd.setBatchBufferStartAddress(addressOfSDI);
bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
*newBuffer = bbCmd;
auto storeSbaField = GfxFamily::cmdInitStoreDataImm;
storeSbaField.setStoreQword(true);
storeSbaField.setAddress(0x0);
storeSbaField.setDataDword0(0xdeadbeef);
storeSbaField.setDataDword1(0xbaadfeed);
storeSbaField.setDwordLength(MI_STORE_DATA_IMM::DWORD_LENGTH_STORE_QWORD);
*miStoreSbaField = storeSbaField;
}
if (fieldOffsetAndValue.size()) {
auto previousBuffer = cmdStream.getSpaceForCmd<MI_BATCH_BUFFER_START>();
const auto addressOfPreviousBuffer = ptrOffset(cmdStreamGpuBase, ptrDiff(reinterpret_cast<uint64_t>(cmdStream.getSpace(0)), cmdStreamCpuBase));
MI_BATCH_BUFFER_START bbCmd = GfxFamily::cmdInitBatchBufferStart;
bbCmd.setAddressSpaceIndicator(MI_BATCH_BUFFER_START::ADDRESS_SPACE_INDICATOR_PPGTT);
bbCmd.setBatchBufferStartAddress(addressOfPreviousBuffer);
bbCmd.setSecondLevelBatchBuffer(MI_BATCH_BUFFER_START::SECOND_LEVEL_BATCH_BUFFER_SECOND_LEVEL_BATCH);
*previousBuffer = bbCmd;
auto arbCmd = GfxFamily::cmdInitArbCheck;
auto arb = cmdStream.getSpaceForCmd<MI_ARB_CHECK>();
arbCmd.setPreParserDisable(false);
*arb = arbCmd;
}
}
} // namespace L0

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
#include "level_zero/core/source/debugger/debugger_l0_base.inl"
namespace NEO {
struct ICLFamily;

View File

@ -1,18 +1,17 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
namespace NEO {
struct TGLLPFamily;
using GfxFamily = TGLLPFamily;
} // namespace NEO
#include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl"
namespace L0 {
template class DebuggerL0Hw<NEO::GfxFamily>;
static DebuggerL0PopulateFactory<IGFX_GEN12LP_CORE, NEO::GfxFamily> debuggerGen12lp;
using Family = NEO::TGLLPFamily;
template class DebuggerL0Hw<Family>;
static DebuggerL0PopulateFactory<IGFX_GEN12LP_CORE, Family> debuggerGen12lp;
} // namespace L0

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
#include "level_zero/core/source/debugger/debugger_l0_base.inl"
namespace NEO {

View File

@ -1,11 +1,12 @@
/*
* Copyright (C) 2020 Intel Corporation
* Copyright (C) 2020-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
#include "level_zero/core/source/debugger/debugger_l0_base.inl"
namespace NEO {
struct SKLFamily;
@ -13,6 +14,7 @@ using GfxFamily = SKLFamily;
} // namespace NEO
namespace L0 {
template class DebuggerL0Hw<NEO::GfxFamily>;
static DebuggerL0PopulateFactory<IGFX_GEN9_CORE, NEO::GfxFamily> debuggerGen9;
} // namespace L0

View File

@ -1,5 +1,5 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@ -9,8 +9,8 @@
namespace L0 {
template <typename Family>
void L0HwHelperHw<Family>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const {
template <typename GfxFamily>
void L0HwHelperHw<GfxFamily>::setAdditionalGroupProperty(ze_command_queue_group_properties_t &groupProperty, NEO::EngineGroupType groupType) const {
}
} // namespace L0

View File

@ -1,19 +1,18 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
namespace NEO {
struct XeHpFamily;
using GfxFamily = XeHpFamily;
} // namespace NEO
#include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl"
namespace L0 {
template class DebuggerL0Hw<NEO::GfxFamily>;
DebuggerL0PopulateFactory<IGFX_XE_HP_CORE, NEO::GfxFamily> debuggerXE_HP_CORE;
using Family = NEO::XeHpFamily;
DebuggerL0PopulateFactory<IGFX_XE_HP_CORE, Family> debuggerXE_HP_CORE;
template class DebuggerL0Hw<Family>;
} // namespace L0

View File

@ -1,19 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
namespace NEO {
struct XE_HPC_COREFamily;
using GfxFamily = XE_HPC_COREFamily;
} // namespace NEO
#include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl"
namespace L0 {
template class DebuggerL0Hw<NEO::GfxFamily>;
DebuggerL0PopulateFactory<IGFX_XE_HPC_CORE, NEO::GfxFamily> debuggerXeHpcCore;
using Family = NEO::XE_HPC_COREFamily;
template class DebuggerL0Hw<Family>;
static DebuggerL0PopulateFactory<IGFX_XE_HPC_CORE, Family> debuggerXeHpcCore;
} // namespace L0

View File

@ -1,19 +1,16 @@
/*
* Copyright (C) 2021 Intel Corporation
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "level_zero/core/source/debugger/debugger_l0.inl"
namespace NEO {
struct XE_HPG_COREFamily;
using GfxFamily = XE_HPG_COREFamily;
} // namespace NEO
#include "level_zero/core/source/debugger/debugger_l0_tgllp_and_later.inl"
namespace L0 {
template class DebuggerL0Hw<NEO::GfxFamily>;
DebuggerL0PopulateFactory<IGFX_XE_HPG_CORE, NEO::GfxFamily> debuggerXeHpgCore;
using Family = NEO::XE_HPG_COREFamily;
template class DebuggerL0Hw<Family>;
static DebuggerL0PopulateFactory<IGFX_XE_HPG_CORE, Family> debuggerXeHpgCore;
} // namespace L0

View File

@ -0,0 +1,10 @@
#
# Copyright (C) 2021-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
target_sources(ze_intel_gpu_aub_tests PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/CMakeLists.txt
${CMAKE_CURRENT_SOURCE_DIR}/debugger_aub_tests.cpp
)

View File

@ -0,0 +1,149 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/source/gmm_helper/gmm_helper.h"
#include "shared/source/helpers/array_count.h"
#include "shared/source/helpers/file_io.h"
#include "shared/test/common/helpers/debug_manager_state_restore.h"
#include "shared/test/common/helpers/default_hw_info.h"
#include "shared/test/common/helpers/test_files.h"
#include "shared/test/common/helpers/variable_backup.h"
#include "shared/test/common/mocks/mock_device.h"
#include "shared/test/common/mocks/mock_io_functions.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/module/module_imp.h"
#include "level_zero/core/test/aub_tests/fixtures/aub_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_driver_handle.h"
namespace L0 {
namespace ult {
struct DebuggerAub : Test<AUBFixtureL0> {
void SetUp() override {
AUBFixtureL0::SetUp(NEO::defaultHwInfo.get(), true);
}
void TearDown() override {
module->destroy();
AUBFixtureL0::TearDown();
}
void createModuleFromFile(const std::string &fileName, ze_context_handle_t context, L0::Device *device) {
std::string testFile;
retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin");
size_t size = 0;
auto src = loadDataFromFile(
testFile.c_str(),
size);
ASSERT_NE(0u, size);
ASSERT_NE(nullptr, src);
ze_module_desc_t moduleDesc = {ZE_STRUCTURE_TYPE_MODULE_DESC};
moduleDesc.format = ZE_MODULE_FORMAT_NATIVE;
moduleDesc.pInputModule = reinterpret_cast<const uint8_t *>(src.get());
moduleDesc.inputSize = size;
moduleDesc.pBuildFlags = "";
module = new ModuleImp(device, nullptr, ModuleType::User);
bool success = module->initialize(&moduleDesc, device->getNEODevice());
ASSERT_TRUE(success);
}
DebugManagerStateRestore restorer;
ModuleImp *module = nullptr;
};
struct DebuggerSingleAddressSpaceAub : public DebuggerAub {
void SetUp() override {
NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1);
DebuggerAub::SetUp();
}
void TearDown() override {
DebuggerAub::TearDown();
}
};
using IsBetweenGen12LpAndXeHp = IsWithinGfxCore<IGFX_GEN12LP_CORE, IGFX_XE_HP_CORE>;
HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked, IsBetweenGen12LpAndXeHp) {
constexpr size_t bufferSize = MemoryConstants::pageSize;
const uint32_t groupSize[] = {32, 1, 1};
const uint32_t groupCount[] = {bufferSize / 32, 1, 1};
const uint32_t expectedSizes[] = {bufferSize, 1, 1};
NEO::DebugManager.flags.UpdateCrossThreadDataSize.set(true);
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::HOST_UNIFIED_MEMORY,
context->rootDeviceIndices,
context->deviceBitfields);
auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
memset(bufferDst, 0, bufferSize);
auto simulatedCsr = AUBFixtureL0::getSimulatedCsr<FamilyType>();
simulatedCsr->initializeEngine();
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation());
ze_group_count_t dispatchTraits;
dispatchTraits.groupCountX = groupCount[0];
dispatchTraits.groupCountY = groupCount[1];
dispatchTraits.groupCountZ = groupCount[2];
createModuleFromFile("test_kernel", context, device);
ze_kernel_handle_t kernel;
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
kernelDesc.pKernelName = "test_get_global_sizes";
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferDst));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2]));
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr));
commandList->close();
pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false);
pCmdq->synchronize(std::numeric_limits<uint32_t>::max());
expectMemory<FamilyType>(reinterpret_cast<void *>(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()),
expectedSizes, sizeof(expectedSizes));
const auto sbaAddress = device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress();
uint32_t low = sbaAddress & 0xffffffff;
uint32_t high = (sbaAddress >> 32) & 0xffffffff;
expectMMIO<FamilyType>(CS_GPR_R15, low);
expectMMIO<FamilyType>(CS_GPR_R15 + 4, high);
auto instructionHeapBaseAddress = neoDevice->getMemoryManager()->getInternalHeapBaseAddress(device->getRootDeviceIndex(), neoDevice->getMemoryManager()->isLocalMemoryUsedForIsa(neoDevice->getRootDeviceIndex()));
auto dynamicStateBaseAddress = NEO::GmmHelper::decanonize(commandList->commandContainer.getIndirectHeap(HeapType::DYNAMIC_STATE)->getGraphicsAllocation()->getGpuAddress());
auto surfaceStateBaseAddress = NEO::GmmHelper::decanonize(commandList->commandContainer.getIndirectHeap(HeapType::SURFACE_STATE)->getGraphicsAllocation()->getGpuAddress());
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress)),
&surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress));
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress)),
&dynamicStateBaseAddress, sizeof(dynamicStateBaseAddress));
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(SbaTrackedAddresses, InstructionBaseAddress)),
&instructionHeapBaseAddress, sizeof(instructionHeapBaseAddress));
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress)),
&surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress));
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
}
} // namespace ult
} // namespace L0

View File

@ -5,6 +5,7 @@
*
*/
#include "shared/source/command_stream/aub_command_stream_receiver_hw.h"
#include "shared/source/command_stream/command_stream_receiver_simulated_common_hw.h"
#include "shared/source/command_stream/command_stream_receiver_with_aub_dump.h"
#include "shared/source/command_stream/tbx_command_stream_receiver_hw.h"
@ -82,6 +83,18 @@ class AUBFixtureL0 {
}
}
template <typename FamilyType>
void expectMMIO(uint32_t mmioRegister, uint32_t expectedValue) {
NEO::AUBCommandStreamReceiverHw<FamilyType> *aubCsr = static_cast<NEO::AUBCommandStreamReceiverHw<FamilyType> *>(csr);
if (NEO::testMode == NEO::TestMode::AubTestsWithTbx) {
aubCsr = static_cast<NEO::AUBCommandStreamReceiverHw<FamilyType> *>(static_cast<NEO::CommandStreamReceiverWithAUBDump<NEO::TbxCommandStreamReceiverHw<FamilyType>> *>(csr)->aubCSR.get());
}
if (aubCsr) {
aubCsr->expectMMIO(mmioRegister, expectedValue);
}
}
const uint32_t rootDeviceIndex = 0;
NEO::ExecutionEnvironment *executionEnvironment;
NEO::MemoryManager *memoryManager = nullptr;

View File

@ -20,6 +20,7 @@ class MockDebuggerL0Hw : public L0::DebuggerL0Hw<GfxFamily> {
public:
using L0::DebuggerL0::perContextSbaAllocations;
using L0::DebuggerL0::sbaTrackingGpuVa;
using L0::DebuggerL0::singleAddressSpaceSbaTracking;
MockDebuggerL0Hw(NEO::Device *device) : L0::DebuggerL0Hw<GfxFamily>(device) {}
~MockDebuggerL0Hw() override = default;

View File

@ -1,5 +1,5 @@
#
# Copyright (C) 2020-2021 Intel Corporation
# Copyright (C) 2020-2022 Intel Corporation
#
# SPDX-License-Identifier: MIT
#
@ -11,6 +11,7 @@ target_sources(${TARGET_NAME} PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/test_source_level_debugger.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_1.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_2.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_single_address_space.cpp
${CMAKE_CURRENT_SOURCE_DIR}/test_module_with_debug.cpp
)

View File

@ -0,0 +1,406 @@
/*
* Copyright (C) 2021-2022 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
*/
#include "shared/test/common/cmd_parse/gen_cmd_parse.h"
#include "shared/test/common/test_macros/test.h"
#include "level_zero/core/source/image/image_hw.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h"
namespace L0 {
namespace ult {
struct SingleAddressSpaceFixture : public Test<DeviceFixture> {
void SetUp() override {
NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1);
Test<DeviceFixture>::SetUp();
}
void TearDown() override {
Test<DeviceFixture>::TearDown();
}
DebugManagerStateRestore restorer;
};
struct L0DebuggerSingleAddressSpace : public Test<L0DebuggerHwFixture> {
void SetUp() override {
NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1);
Test<L0DebuggerHwFixture>::SetUp();
}
void TearDown() override {
Test<L0DebuggerHwFixture>::TearDown();
}
DebugManagerStateRestore restorer;
};
HWTEST_F(SingleAddressSpaceFixture, givenDebugFlagForceSbaTrackingModeSetWhenDebuggerIsCreatedThenItHasCorrectSingleAddressSpaceValue) {
DebugManagerStateRestore restorer;
NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(1);
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
EXPECT_TRUE(debugger->singleAddressSpaceSbaTracking);
NEO::DebugManager.flags.DebuggerForceSbaTrackingMode.set(0);
debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
EXPECT_FALSE(debugger->singleAddressSpaceSbaTracking);
}
HWTEST_F(SingleAddressSpaceFixture, givenSingleAddressSpaceWhenDebuggerIsCreatedThenSbaTrackingGpuVaIsNotReserved) {
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
EXPECT_EQ(0u, debugger->sbaTrackingGpuVa.address);
EXPECT_EQ(0u, debugger->sbaTrackingGpuVa.size);
EXPECT_EQ(0u, debugger->getSbaTrackingGpuVa());
std::vector<NEO::GraphicsAllocation *> allocations;
auto &allEngines = device->getNEODevice()->getMemoryManager()->getRegisteredEngines();
for (auto &engine : allEngines) {
auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId());
ASSERT_NE(nullptr, sbaAllocation);
allocations.push_back(sbaAllocation);
EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType());
}
for (uint32_t i = 0; i < allocations.size() - 1; i++) {
EXPECT_NE(allocations[i]->getGpuAddress(), allocations[i + 1]->getGpuAddress());
}
}
HWTEST2_F(SingleAddressSpaceFixture, WhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenAbortIsCalledAndNoCommandsAreAddedToStream, IsAtMostGen11) {
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_MATH = typename FamilyType::MI_MATH;
StackVec<char, 4096> buffer(4096);
NEO::LinearStream cmdStream(buffer.begin(), buffer.size());
uint64_t gsba = 0x60000;
uint64_t ssba = 0x1234567000;
uint64_t iba = 0xfff80000;
uint64_t ioba = 0x8100000;
uint64_t dsba = 0xffff0000aaaa0000;
NEO::Debugger::SbaAddresses sbaAddresses = {};
sbaAddresses.GeneralStateBaseAddress = gsba;
sbaAddresses.SurfaceStateBaseAddress = ssba;
sbaAddresses.InstructionBaseAddress = iba;
sbaAddresses.IndirectObjectBaseAddress = ioba;
sbaAddresses.DynamicStateBaseAddress = dsba;
sbaAddresses.BindlessSurfaceStateBaseAddress = ssba;
EXPECT_THROW(debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses), std::exception);
EXPECT_EQ(0u, cmdStream.getUsed());
EXPECT_THROW(debugger->getSbaTrackingCommandsSize(6), std::exception);
}
HWTEST2_F(SingleAddressSpaceFixture, GivenNonZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenCorrectSequenceOfCommandsAreAddedToStream, IsAtLeastGen12lp) {
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(),
true,
MemoryConstants::pageSize,
AllocationType::COMMAND_BUFFER,
false,
device->getNEODevice()->getDeviceBitfield()};
auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties);
ASSERT_NE(nullptr, streamAllocation);
NEO::LinearStream cmdStream;
cmdStream.replaceGraphicsAllocation(streamAllocation);
cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize());
uint64_t gsba = 0x60000;
uint64_t ssba = 0x1234567000;
uint64_t iba = 0xfff80000;
uint64_t ioba = 0x8100000;
uint64_t dsba = 0xffff0000aaaa0000;
NEO::Debugger::SbaAddresses sbaAddresses = {};
sbaAddresses.GeneralStateBaseAddress = gsba;
sbaAddresses.SurfaceStateBaseAddress = ssba;
sbaAddresses.InstructionBaseAddress = iba;
sbaAddresses.IndirectObjectBaseAddress = ioba;
sbaAddresses.DynamicStateBaseAddress = dsba;
sbaAddresses.BindlessSurfaceStateBaseAddress = ssba;
debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed()));
size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START);
for (int i = 0; i < 6; i++) {
sizeExpected += NEO::EncodeSetMMIO<FamilyType>::sizeIMM;
sizeExpected += NEO::EncodeMath<FamilyType>::streamCommandSize;
sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM);
sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM);
sizeExpected += sizeof(MI_ARB_CHECK);
sizeExpected += sizeof(MI_BATCH_BUFFER_START);
sizeExpected += sizeof(MI_STORE_DATA_IMM);
}
sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(sizeExpected, cmdStream.getUsed());
EXPECT_EQ(sizeExpected, debugger->getSbaTrackingCommandsSize(6));
auto itor = find<MI_ARB_CHECK *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
for (int i = 0; i < 6; i++) {
itor = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
itor = find<MI_MATH *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_ARB_CHECK *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
}
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_ARB_CHECK *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto miArb = genCmdCast<MI_ARB_CHECK *>(*itor);
EXPECT_FALSE(miArb->getPreParserDisable());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation);
}
HWTEST2_F(SingleAddressSpaceFixture, GivenOneNonZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenONlyPartOfCommandsAreAddedToStream, IsAtLeastGen12lp) {
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
using MI_ARB_CHECK = typename FamilyType::MI_ARB_CHECK;
using MI_BATCH_BUFFER_START = typename FamilyType::MI_BATCH_BUFFER_START;
using MI_STORE_REGISTER_MEM = typename FamilyType::MI_STORE_REGISTER_MEM;
using MI_MATH = typename FamilyType::MI_MATH;
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(),
true,
MemoryConstants::pageSize,
AllocationType::COMMAND_BUFFER,
false,
device->getNEODevice()->getDeviceBitfield()};
auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties);
ASSERT_NE(nullptr, streamAllocation);
NEO::LinearStream cmdStream;
cmdStream.replaceGraphicsAllocation(streamAllocation);
cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize());
uint64_t ssba = 0x1234567000;
NEO::Debugger::SbaAddresses sbaAddresses = {0};
sbaAddresses.SurfaceStateBaseAddress = ssba;
debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed()));
size_t sizeExpected = sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START);
sizeExpected += NEO::EncodeSetMMIO<FamilyType>::sizeIMM;
sizeExpected += NEO::EncodeMath<FamilyType>::streamCommandSize;
sizeExpected += 2 * sizeof(MI_STORE_REGISTER_MEM);
sizeExpected += 2 * sizeof(MI_STORE_DATA_IMM);
sizeExpected += sizeof(MI_ARB_CHECK);
sizeExpected += sizeof(MI_BATCH_BUFFER_START);
sizeExpected += sizeof(MI_STORE_DATA_IMM);
sizeExpected += sizeof(MI_ARB_CHECK) + sizeof(MI_BATCH_BUFFER_START);
EXPECT_EQ(sizeExpected, cmdStream.getUsed());
EXPECT_EQ(sizeExpected, debugger->getSbaTrackingCommandsSize(1));
auto itor = find<MI_ARB_CHECK *>(cmdList.begin(), cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_LOAD_REGISTER_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto lri = genCmdCast<MI_LOAD_REGISTER_IMM *>(*itor);
EXPECT_EQ(CS_GPR_R0, lri->getRegisterOffset());
itor = find<MI_MATH *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_REGISTER_MEM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_ARB_CHECK *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_STORE_DATA_IMM *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_BATCH_BUFFER_START *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
itor = find<MI_ARB_CHECK *>(itor, cmdList.end());
ASSERT_NE(cmdList.end(), itor);
auto miArb = genCmdCast<MI_ARB_CHECK *>(*itor);
EXPECT_FALSE(miArb->getPreParserDisable());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation);
}
HWTEST2_F(SingleAddressSpaceFixture, GivenAllZeroSbaAddressesWhenProgrammingSbaTrackingCommandsForSingleAddressSpaceThenNoCommandsAreAddedToStream, IsAtLeastGen12lp) {
auto debugger = std::make_unique<MockDebuggerL0Hw<FamilyType>>(neoDevice);
AllocationProperties commandBufferProperties = {device->getRootDeviceIndex(),
true,
MemoryConstants::pageSize,
AllocationType::COMMAND_BUFFER,
false,
device->getNEODevice()->getDeviceBitfield()};
auto streamAllocation = device->getNEODevice()->getMemoryManager()->allocateGraphicsMemoryWithProperties(commandBufferProperties);
ASSERT_NE(nullptr, streamAllocation);
NEO::LinearStream cmdStream;
cmdStream.replaceGraphicsAllocation(streamAllocation);
cmdStream.replaceBuffer(streamAllocation->getUnderlyingBuffer(), streamAllocation->getUnderlyingBufferSize());
NEO::Debugger::SbaAddresses sbaAddresses = {0};
debugger->programSbaTrackingCommandsSingleAddressSpace(cmdStream, sbaAddresses);
size_t sizeExpected = 0;
EXPECT_EQ(sizeExpected, cmdStream.getUsed());
device->getNEODevice()->getMemoryManager()->freeGraphicsMemory(streamAllocation);
}
HWTEST2_F(L0DebuggerSingleAddressSpace, givenDebuggingEnabledWhenCommandListIsExecutedThenValidKernelDebugCommandsAreAdded, IsAtLeastGen12lp) {
using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM;
using STATE_SIP = typename FamilyType::STATE_SIP;
ze_command_queue_desc_t queueDesc = {};
ze_result_t returnValue;
auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue));
ASSERT_NE(nullptr, commandQueue->commandStream);
auto usedSpaceBefore = commandQueue->commandStream->getUsed();
ze_command_list_handle_t commandLists[] = {
CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()};
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true);
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
auto usedSpaceAfter = commandQueue->commandStream->getUsed();
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
GenCmdList cmdList;
ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(
cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter));
auto miLoadImm = findAll<MI_LOAD_REGISTER_IMM *>(cmdList.begin(), cmdList.end());
size_t gpr15RegisterCount = 0;
size_t gprMiLoadindex = std::numeric_limits<size_t>::max();
for (size_t i = 0; i < miLoadImm.size(); i++) {
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[i]);
ASSERT_NE(nullptr, miLoad);
if (miLoad->getRegisterOffset() == CS_GPR_R15) {
gpr15RegisterCount++;
gprMiLoadindex = i;
}
if (miLoad->getRegisterOffset() == CS_GPR_R15 + 4) {
gpr15RegisterCount++;
}
}
// 2 LRI commands to store SBA buffer address
EXPECT_EQ(2u, gpr15RegisterCount);
auto sbaGpuVa = getMockDebuggerL0Hw<FamilyType>()->getSbaTrackingBuffer(commandQueue->getCsr()->getOsContext().getContextId())->getGpuAddress();
uint32_t low = sbaGpuVa & 0xffffffff;
uint32_t high = (sbaGpuVa >> 32) & 0xffffffff;
MI_LOAD_REGISTER_IMM *miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[gprMiLoadindex]);
EXPECT_EQ(CS_GPR_R15, miLoad->getRegisterOffset());
EXPECT_EQ(low, miLoad->getDataDword());
miLoad = genCmdCast<MI_LOAD_REGISTER_IMM *>(*miLoadImm[gprMiLoadindex + 1]);
EXPECT_EQ(CS_GPR_R15 + 4, miLoad->getRegisterOffset());
EXPECT_EQ(high, miLoad->getDataDword());
for (auto i = 0u; i < numCommandLists; i++) {
auto commandList = CommandList::fromHandle(commandLists[i]);
commandList->destroy();
}
commandQueue->destroy();
}
} // namespace ult
} // namespace L0

View File

@ -252,6 +252,7 @@ DebuggerLogBitmask = 0
GTPinAllocateBufferInSharedMemory = -1
DeferOsContextInitialization = -1
DebuggerOptDisable = -1
DebuggerForceSbaTrackingMode = -1
ExperimentalEnableCustomLocalMemoryAlignment = 0
AlignLocalMemoryVaTo2MB = -1
EngineInstancedSubDevices = 0

View File

@ -122,8 +122,10 @@ template <typename GfxFamily>
struct EncodeMath {
using MI_MATH_ALU_INST_INLINE = typename GfxFamily::MI_MATH_ALU_INST_INLINE;
using MI_MATH = typename GfxFamily::MI_MATH;
constexpr static size_t streamCommandSize = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
static uint32_t *commandReserve(CommandContainer &container);
static uint32_t *commandReserve(LinearStream &cmdStream);
static void greaterThan(CommandContainer &container,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
@ -132,6 +134,10 @@ struct EncodeMath {
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister);
static void addition(LinearStream &cmdStream,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister);
static void bitwiseAnd(CommandContainer &container,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,

View File

@ -200,9 +200,14 @@ void EncodeMathMMIO<Family>::encodeAlu(MI_MATH_ALU_INST_INLINE *pAluParam, AluRe
template <typename Family>
uint32_t *EncodeMath<Family>::commandReserve(CommandContainer &container) {
return commandReserve(*container.getCommandStream());
}
template <typename Family>
uint32_t *EncodeMath<Family>::commandReserve(LinearStream &cmdStream) {
size_t size = sizeof(MI_MATH) + sizeof(MI_MATH_ALU_INST_INLINE) * NUM_ALU_INST_FOR_READ_MODIFY_WRITE;
auto cmd = reinterpret_cast<uint32_t *>(container.getCommandStream()->getSpace(size));
auto cmd = reinterpret_cast<uint32_t *>(cmdStream.getSpace(size));
MI_MATH mathBuffer;
mathBuffer.DW0.Value = 0x0;
mathBuffer.DW0.BitField.InstructionType = MI_MATH::COMMAND_TYPE_MI_COMMAND;
@ -267,6 +272,19 @@ void EncodeMath<Family>::addition(CommandContainer &container,
finalResultRegister);
}
template <typename Family>
void EncodeMath<Family>::addition(LinearStream &cmdStream,
AluRegisters firstOperandRegister,
AluRegisters secondOperandRegister,
AluRegisters finalResultRegister) {
uint32_t *cmd = EncodeMath<Family>::commandReserve(cmdStream);
EncodeMathMMIO<Family>::encodeAluAdd(reinterpret_cast<MI_MATH_ALU_INST_INLINE *>(cmd),
firstOperandRegister,
secondOperandRegister,
finalResultRegister);
}
template <typename Family>
void EncodeMath<Family>::bitwiseAnd(CommandContainer &container,
AluRegisters firstOperandRegister,

View File

@ -119,6 +119,7 @@ DECLARE_DEBUG_VARIABLE(int32_t, GpuScratchRegWriteRegisterData, 0, "register dat
DECLARE_DEBUG_VARIABLE(int32_t, OverrideSlmAllocationSize, -1, "-1: default, >=0: program value for shared local memory size")
DECLARE_DEBUG_VARIABLE(int32_t, DebuggerLogBitmask, 0, "0: logs disabled, 1 - INFO, 2 - ERROR, 1<<10 - Dump elf, see DebugVariables::DEBUGGER_LOG_BITMASK")
DECLARE_DEBUG_VARIABLE(int32_t, DebuggerOptDisable, -1, "-1: default from debugger query, 0: do not add opt-disable, 1: add opt-disable")
DECLARE_DEBUG_VARIABLE(int32_t, DebuggerForceSbaTrackingMode, -1, "-1: default, 0: per context address spaces, 1: single address space")
DECLARE_DEBUG_VARIABLE(int32_t, DebugApiUsed, 0, "0: default L0 Debug API not used, 1: L0 Debug API used")
DECLARE_DEBUG_VARIABLE(int32_t, OverrideCsrAllocationSize, -1, "-1: default, >0: use value for size of CSR allocation")
DECLARE_DEBUG_VARIABLE(int32_t, CFEComputeOverdispatchDisable, -1, "Set Compute Overdispatch Disable field in CFE_STATE, -1: do not set.")