mirror of
https://github.com/intel/compute-runtime.git
synced 2026-01-05 09:09:04 +08:00
fix: add missing sba capture when debug enabled
- In global bindless mode SBA may be programmed for scratch allocation, missing sba capturing caused issues for kernels with scratch. - this fix adds SBA capturing after SBA command Related-To: NEO-7063 Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
committed by
Compute-Runtime-Automation
parent
579af57161
commit
4c3a0d8344
@@ -8,6 +8,7 @@
|
||||
#include "shared/source/debugger/debugger_l0.h"
|
||||
#include "shared/source/gmm_helper/gmm_helper.h"
|
||||
#include "shared/source/helpers/array_count.h"
|
||||
#include "shared/source/helpers/bindless_heaps_helper.h"
|
||||
#include "shared/source/helpers/file_io.h"
|
||||
#include "shared/source/helpers/register_offsets.h"
|
||||
#include "shared/source/indirect_heap/indirect_heap.h"
|
||||
@@ -149,5 +150,125 @@ HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExe
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
|
||||
}
|
||||
|
||||
struct DebuggerSingleAddressSpaceGlobalBindlessAllocatorAubFixture : public DebuggerAubFixture {
|
||||
void setUp() {
|
||||
NEO::debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
|
||||
NEO::debugManager.flags.UseBindlessMode.set(1);
|
||||
DebuggerAubFixture::setUp();
|
||||
}
|
||||
void tearDown() {
|
||||
DebuggerAubFixture::tearDown();
|
||||
}
|
||||
};
|
||||
using DebuggerGlobalAllocatorAub = Test<DebuggerSingleAddressSpaceGlobalBindlessAllocatorAubFixture>;
|
||||
using PlatformsSupportingGlobalBindless = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE2_HPG_CORE>;
|
||||
|
||||
HWTEST2_F(DebuggerGlobalAllocatorAub, GivenKernelWithScratchWhenCmdListExecutedThenSbaAddressesAreTracked, PlatformsSupportingGlobalBindless) {
|
||||
|
||||
const uint32_t arraySize = 32;
|
||||
const uint32_t typeSize = sizeof(int);
|
||||
|
||||
uint32_t bufferSize = (arraySize * 2 + 1) * typeSize - 4;
|
||||
const uint32_t groupSize[] = {arraySize, 1, 1};
|
||||
const uint32_t groupCount[] = {1, 1, 1};
|
||||
|
||||
memoryManager = neoDevice->getMemoryManager();
|
||||
gmmHelper = neoDevice->getGmmHelper();
|
||||
rootDeviceIndex = neoDevice->getRootDeviceIndex();
|
||||
|
||||
NEO::debugManager.flags.UpdateCrossThreadDataSize.set(true);
|
||||
|
||||
ASSERT_NE(nullptr, neoDevice->getBindlessHeapsHelper());
|
||||
|
||||
NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory,
|
||||
1,
|
||||
context->rootDeviceIndices,
|
||||
context->deviceBitfields);
|
||||
|
||||
auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferDst, 0, bufferSize);
|
||||
auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties);
|
||||
memset(bufferSrc, 0, bufferSize);
|
||||
auto bufferOffset = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(128 * arraySize, unifiedMemoryProperties);
|
||||
memset(bufferOffset, 0, 128 * arraySize);
|
||||
|
||||
int *srcBufferInt = static_cast<int *>(bufferSrc);
|
||||
std::unique_ptr<int[]> expectedMemoryInt = std::make_unique<int[]>(bufferSize / typeSize);
|
||||
const int expectedVal1 = 16256;
|
||||
const int expectedVal2 = 512;
|
||||
|
||||
for (uint32_t i = 0; i < arraySize; ++i) {
|
||||
srcBufferInt[i] = 2;
|
||||
expectedMemoryInt[i * 2] = expectedVal1;
|
||||
expectedMemoryInt[i * 2 + 1] = expectedVal2;
|
||||
}
|
||||
|
||||
auto simulatedCsr = AUBFixtureL0::getSimulatedCsr<FamilyType>();
|
||||
simulatedCsr->initializeEngine();
|
||||
|
||||
simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation());
|
||||
|
||||
ze_group_count_t dispatchTraits;
|
||||
dispatchTraits.groupCountX = groupCount[0];
|
||||
dispatchTraits.groupCountY = groupCount[1];
|
||||
dispatchTraits.groupCountZ = groupCount[2];
|
||||
|
||||
module = static_cast<L0::ModuleImp *>(Module::fromHandle(createModuleFromFile("simple_spill_fill_kernel", context, device, "", true)));
|
||||
|
||||
ze_kernel_handle_t kernel;
|
||||
ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC};
|
||||
kernelDesc.pKernelName = "spill_test";
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 2, sizeof(void *), &bufferOffset));
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2]));
|
||||
|
||||
ze_command_list_handle_t cmdListHandle = commandList->toHandle();
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr));
|
||||
commandList->close();
|
||||
|
||||
pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr);
|
||||
pCmdq->synchronize(std::numeric_limits<uint64_t>::max());
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()),
|
||||
expectedMemoryInt.get(), bufferSize);
|
||||
|
||||
const auto sbaAddress = device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress();
|
||||
auto instructionHeapBaseAddress = memoryManager->getInternalHeapBaseAddress(rootDeviceIndex,
|
||||
memoryManager->isLocalMemoryUsedForIsa(rootDeviceIndex));
|
||||
instructionHeapBaseAddress = gmmHelper->canonize(instructionHeapBaseAddress);
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(NEO::SbaTrackedAddresses, instructionBaseAddress)),
|
||||
&instructionHeapBaseAddress, sizeof(instructionHeapBaseAddress));
|
||||
|
||||
auto commandListSurfaceHeapAllocation = commandList->commandContainer.getIndirectHeap(HeapType::surfaceState);
|
||||
|
||||
auto surfaceStateBaseAddress = commandListSurfaceHeapAllocation->getGraphicsAllocation()->getGpuAddress();
|
||||
surfaceStateBaseAddress = gmmHelper->canonize(surfaceStateBaseAddress);
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(NEO::SbaTrackedAddresses, surfaceStateBaseAddress)),
|
||||
&surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress));
|
||||
|
||||
auto bindlessSurfaceStateBaseAddress = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase();
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(NEO::SbaTrackedAddresses, bindlessSurfaceStateBaseAddress)),
|
||||
&bindlessSurfaceStateBaseAddress, sizeof(bindlessSurfaceStateBaseAddress));
|
||||
|
||||
auto commandListDynamicHeapAllocation = commandList->commandContainer.getIndirectHeap(HeapType::dynamicState);
|
||||
if (commandListDynamicHeapAllocation) {
|
||||
auto dynamicStateBaseAddress = commandListDynamicHeapAllocation->getGraphicsAllocation()->getGpuAddress();
|
||||
dynamicStateBaseAddress = gmmHelper->canonize(dynamicStateBaseAddress);
|
||||
|
||||
expectMemory<FamilyType>(reinterpret_cast<void *>(sbaAddress + offsetof(NEO::SbaTrackedAddresses, dynamicStateBaseAddress)),
|
||||
&bindlessSurfaceStateBaseAddress, sizeof(bindlessSurfaceStateBaseAddress));
|
||||
}
|
||||
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel));
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst);
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc);
|
||||
driverHandle->svmAllocsManager->freeSVMAlloc(bufferOffset);
|
||||
}
|
||||
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -105,10 +105,14 @@ void AUBFixtureL0::tearDown() {
|
||||
pCmdq->destroy();
|
||||
}
|
||||
|
||||
ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags) {
|
||||
ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags, bool useSharedFile) {
|
||||
ze_module_handle_t moduleHandle;
|
||||
std::string testFile;
|
||||
retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin");
|
||||
if (useSharedFile) {
|
||||
retrieveBinaryKernelFilename(testFile, fileName + "_", ".bin");
|
||||
} else {
|
||||
retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin");
|
||||
}
|
||||
|
||||
size_t size = 0;
|
||||
auto src = loadDataFromFile(testFile.c_str(), size);
|
||||
@@ -129,4 +133,5 @@ ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileNam
|
||||
EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleCreate(context, device, &moduleDesc, &moduleHandle, nullptr));
|
||||
return moduleHandle;
|
||||
}
|
||||
|
||||
} // namespace L0
|
||||
|
||||
@@ -101,7 +101,7 @@ class AUBFixtureL0 {
|
||||
}
|
||||
}
|
||||
|
||||
static ze_module_handle_t createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags);
|
||||
static ze_module_handle_t createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags, bool useSharedFile = false);
|
||||
|
||||
std::string aubFileName;
|
||||
std::unique_ptr<VariableBackup<NEO::UltHwConfig>> backupUltConfig;
|
||||
|
||||
@@ -10,5 +10,7 @@
|
||||
namespace L0 {
|
||||
namespace ult {
|
||||
HWTEST_EXCLUDE_PRODUCT(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked_PlatformsSupportingSingleAddressSpace, IGFX_XE_HPG_CORE);
|
||||
}
|
||||
|
||||
HWTEST_EXCLUDE_PRODUCT(DebuggerGlobalAllocatorAub, GivenKernelWithScratchWhenCmdListExecutedThenSbaAddressesAreTracked_PlatformsSupportingGlobalBindless, IGFX_XE_HPG_CORE);
|
||||
} // namespace ult
|
||||
} // namespace L0
|
||||
|
||||
@@ -111,6 +111,19 @@ struct L0DebuggerPerContextAddressSpaceFixture : public L0DebuggerHwFixture {
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
struct L0DebuggerPerContextAddressSpaceGlobalBindlessFixture : public L0DebuggerHwFixture {
|
||||
void setUp() {
|
||||
NEO::debugManager.flags.DebuggerForceSbaTrackingMode.set(0);
|
||||
NEO::debugManager.flags.UseBindlessMode.set(1);
|
||||
NEO::debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1);
|
||||
L0DebuggerHwFixture::setUp();
|
||||
}
|
||||
void tearDown() {
|
||||
L0DebuggerHwFixture::tearDown();
|
||||
}
|
||||
DebugManagerStateRestore restorer;
|
||||
};
|
||||
|
||||
struct L0DebuggerSingleAddressSpaceFixture : public L0DebuggerHwFixture {
|
||||
void setUp() {
|
||||
NEO::debugManager.flags.DebuggerForceSbaTrackingMode.set(1);
|
||||
|
||||
@@ -17,6 +17,8 @@
|
||||
#include "level_zero/core/source/cmdlist/cmdlist.h"
|
||||
#include "level_zero/core/test/unit_tests/fixtures/device_fixture.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
|
||||
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
|
||||
#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h"
|
||||
|
||||
namespace L0 {
|
||||
@@ -179,6 +181,90 @@ HWTEST2_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledAndRequired
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
using L0DebuggerPerContextAddressSpaceGlobalBindlessTest = Test<L0DebuggerPerContextAddressSpaceGlobalBindlessFixture>;
|
||||
using PlatformsSupportingGlobalBindless = IsWithinGfxCore<IGFX_XE_HP_CORE, IGFX_XE2_HPG_CORE>;
|
||||
|
||||
HWTEST2_F(L0DebuggerPerContextAddressSpaceGlobalBindlessTest, givenDebuggingEnabledAndRequiredSshWhenCommandListIsExecutedThenProgramSsbaWritesToSbaTrackingBuffer, PlatformsSupportingGlobalBindless) {
|
||||
using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM;
|
||||
using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS;
|
||||
|
||||
ze_command_queue_desc_t queueDesc = {};
|
||||
ze_result_t returnValue;
|
||||
auto cmdQ = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, false, returnValue);
|
||||
ASSERT_NE(nullptr, cmdQ);
|
||||
|
||||
auto commandQueue = whiteboxCast(cmdQ);
|
||||
auto usedSpaceBefore = commandQueue->commandStream.getUsed();
|
||||
|
||||
auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false);
|
||||
ze_command_list_handle_t commandLists[] = {commandList->toHandle()};
|
||||
|
||||
Mock<Module> module(device, nullptr, ModuleType::user);
|
||||
Mock<KernelImp> kernel;
|
||||
kernel.module = &module;
|
||||
ze_group_count_t groupCount{1, 1, 1};
|
||||
|
||||
kernel.descriptor.kernelAttributes.perThreadScratchSize[0] = 0x40;
|
||||
|
||||
CmdListKernelLaunchParams launchParams = {};
|
||||
auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
CommandList::fromHandle(commandLists[0])->close();
|
||||
|
||||
uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]);
|
||||
|
||||
result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true, nullptr);
|
||||
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
|
||||
|
||||
auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed();
|
||||
ASSERT_GT(usedSpaceAfter, usedSpaceBefore);
|
||||
|
||||
GenCmdList cmdList;
|
||||
ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer(
|
||||
cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), usedSpaceAfter));
|
||||
|
||||
auto sbaItors = findAll<STATE_BASE_ADDRESS *>(cmdList.begin(), cmdList.end());
|
||||
ASSERT_NE(0u, sbaItors.size());
|
||||
|
||||
auto sbaItor = sbaItors[sbaItors.size() - 1];
|
||||
|
||||
ASSERT_NE(cmdList.end(), sbaItor);
|
||||
auto cmdSba = genCmdCast<STATE_BASE_ADDRESS *>(*sbaItor);
|
||||
|
||||
auto sdiItors = findAll<MI_STORE_DATA_IMM *>(sbaItor, cmdList.end());
|
||||
ASSERT_NE(0u, sdiItors.size());
|
||||
|
||||
auto cmdSdi = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItors[0]);
|
||||
|
||||
auto gmmHelper = neoDevice->getGmmHelper();
|
||||
auto expectedSshGpuVa = commandList->getCmdContainer().getIndirectHeap(HeapType::surfaceState)->getGpuBase();
|
||||
|
||||
for (size_t i = 0; i < sdiItors.size(); i++) {
|
||||
cmdSdi = genCmdCast<MI_STORE_DATA_IMM *>(*sdiItors[i]);
|
||||
uint64_t address = cmdSdi->getDataDword1();
|
||||
address <<= 32;
|
||||
address = address | cmdSdi->getDataDword0();
|
||||
if (expectedSshGpuVa == address) {
|
||||
break;
|
||||
}
|
||||
cmdSdi = nullptr;
|
||||
}
|
||||
|
||||
ASSERT_NE(nullptr, cmdSdi);
|
||||
uint64_t ssbaGpuVa = gmmHelper->canonize(cmdSba->getSurfaceStateBaseAddress());
|
||||
EXPECT_EQ(static_cast<uint32_t>(ssbaGpuVa & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0());
|
||||
EXPECT_EQ(static_cast<uint32_t>(ssbaGpuVa >> 32), cmdSdi->getDataDword1());
|
||||
|
||||
auto expectedGpuVa = gmmHelper->decanonize(device->getL0Debugger()->getSbaTrackingGpuVa()) + offsetof(NEO::SbaTrackedAddresses, surfaceStateBaseAddress);
|
||||
EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress());
|
||||
|
||||
for (auto i = 0u; i < numCommandLists; i++) {
|
||||
auto commandList = CommandList::fromHandle(commandLists[i]);
|
||||
commandList->destroy();
|
||||
}
|
||||
commandQueue->destroy();
|
||||
}
|
||||
|
||||
HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesArePrinted, Gen12Plus) {
|
||||
|
||||
auto &compilerProductHelper = neoDevice->getCompilerProductHelper();
|
||||
|
||||
Reference in New Issue
Block a user