From c9d61840a2bb1a95a9ea3f50a297355e79a3f17f Mon Sep 17 00:00:00 2001 From: Mateusz Hoppe Date: Thu, 14 Apr 2022 19:13:34 +0000 Subject: [PATCH] Allocate SBA buffers per HW context - different physical storage for every HW context - adds support for debugging with implicit scaling on - reorganize tests Relates-To: NEO-6883 Signed-off-by: Mateusz Hoppe --- .../core/source/debugger/debugger_l0.cpp | 7 +- .../sources/debugger/CMakeLists.txt | 1 + .../sources/debugger/test_l0_debugger_1.cpp | 524 -------------- .../test_l0_debugger_sba_tracking.cpp | 641 ++++++++++++++++++ .../linux/drm_memory_manager_tests.cpp | 88 +++ .../definitions/storage_info.cpp | 1 + .../memory_manager/graphics_allocation.h | 1 + .../source/memory_manager/memory_manager.cpp | 6 +- .../os_agnostic_memory_manager.cpp | 6 +- .../os_interface/linux/drm_memory_manager.cpp | 10 +- ...y_manager_create_multi_host_allocation.cpp | 16 +- 11 files changed, 765 insertions(+), 536 deletions(-) create mode 100644 level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp diff --git a/level_zero/core/source/debugger/debugger_l0.cpp b/level_zero/core/source/debugger/debugger_l0.cpp index bdb6ee7c09..51179cec4f 100644 --- a/level_zero/core/source/debugger/debugger_l0.cpp +++ b/level_zero/core/source/debugger/debugger_l0.cpp @@ -52,11 +52,10 @@ void DebuggerL0::initialize() { if (!singleAddressSpaceSbaTracking) { properties.osContext = engine.osContext; } - auto sbaAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); - memset(sbaAllocation->getUnderlyingBuffer(), 0, sbaAllocation->getUnderlyingBufferSize()); + properties.subDevicesBitfield = engine.osContext->getDeviceBitfield(); - auto sbaHeaderPtr = reinterpret_cast(sbaAllocation->getUnderlyingBuffer()); - *sbaHeaderPtr = sbaHeader; + auto sbaAllocation = device->getMemoryManager()->allocateGraphicsMemoryWithProperties(properties); + device->getMemoryManager()->copyMemoryToAllocation(sbaAllocation, 0, &sbaHeader, sizeof(sbaHeader)); perContextSbaAllocations[engine.osContext->getContextId()] = sbaAllocation; } diff --git a/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt b/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt index fc95bbdeec..e44c08d65f 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt +++ b/level_zero/core/test/unit_tests/sources/debugger/CMakeLists.txt @@ -11,6 +11,7 @@ target_sources(${TARGET_NAME} PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/test_source_level_debugger.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_1.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_2.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_sba_tracking.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_l0_debugger_single_address_space.cpp ${CMAKE_CURRENT_SOURCE_DIR}/test_module_with_debug.cpp ) diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp index 4707c3bb82..97693646d1 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_1.cpp @@ -129,53 +129,6 @@ TEST(Debugger, givenDebuggingEnabledInExecEnvWhenAllocatingIsaThenSingleBankIsUs neoDevice->getMemoryManager()->freeGraphicsMemory(allocation); } -HWTEST_F(L0DebuggerTest, givenL0DebuggerWhenCreatedThenPerContextSbaTrackingBuffersAreAllocated) { - auto debugger = device->getL0Debugger(); - ASSERT_NE(nullptr, debugger); - - EXPECT_NE(0u, debugger->getSbaTrackingGpuVa()); - std::vector allocations; - - auto &allEngines = device->getNEODevice()->getMemoryManager()->getRegisteredEngines(); - - for (auto &engine : allEngines) { - auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId()); - ASSERT_NE(nullptr, sbaAllocation); - allocations.push_back(sbaAllocation); - - EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType()); - EXPECT_EQ(MemoryPool::System4KBPages, sbaAllocation->getMemoryPool()); - } - - for (uint32_t i = 0; i < allocations.size() - 1; i++) { - EXPECT_NE(allocations[i], allocations[i + 1]); - } - - EXPECT_EQ(allEngines.size(), getMockDebuggerL0Hw()->perContextSbaAllocations.size()); -} - -HWTEST_F(L0DebuggerTest, givenCreatedL0DebuggerThenSbaTrackingBuffersContainValidHeader) { - auto debugger = device->getL0Debugger(); - ASSERT_NE(nullptr, debugger); - - for (auto &sbaBuffer : getMockDebuggerL0Hw()->perContextSbaAllocations) { - auto sbaAllocation = sbaBuffer.second; - ASSERT_NE(nullptr, sbaAllocation); - - auto sbaHeader = reinterpret_cast(sbaAllocation->getUnderlyingBuffer()); - - EXPECT_STREQ("sbaarea", sbaHeader->magic); - EXPECT_EQ(0u, sbaHeader->BindlessSamplerStateBaseAddress); - EXPECT_EQ(0u, sbaHeader->BindlessSurfaceStateBaseAddress); - EXPECT_EQ(0u, sbaHeader->DynamicStateBaseAddress); - EXPECT_EQ(0u, sbaHeader->GeneralStateBaseAddress); - EXPECT_EQ(0u, sbaHeader->IndirectObjectBaseAddress); - EXPECT_EQ(0u, sbaHeader->InstructionBaseAddress); - EXPECT_EQ(0u, sbaHeader->SurfaceStateBaseAddress); - EXPECT_EQ(0u, sbaHeader->Version); - } -} - HWTEST_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenValidKernelDebugCommandsAreAdded) { using MI_LOAD_REGISTER_IMM = typename FamilyType::MI_LOAD_REGISTER_IMM; using STATE_SIP = typename FamilyType::STATE_SIP; @@ -242,192 +195,8 @@ HWTEST_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenValid commandQueue->destroy(); } -using NotGen8Or11 = AreNotGfxCores; - -HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndRequiredGsbaWhenCommandListIsExecutedThenProgramGsbaWritesToSbaTrackingBuffer, NotGen8Or11) { - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; - - ze_command_queue_desc_t queueDesc = {}; - ze_result_t returnValue; - auto cmdQ = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue); - ASSERT_NE(nullptr, cmdQ); - - auto commandQueue = whitebox_cast(cmdQ); - auto cmdQHw = static_cast *>(cmdQ); - - if (cmdQHw->estimateStateBaseAddressCmdSize() == 0) { - commandQueue->destroy(); - GTEST_SKIP(); - } - - auto usedSpaceBefore = commandQueue->commandStream->getUsed(); - - ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; - CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096); - - uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); - - auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); - ASSERT_EQ(ZE_RESULT_SUCCESS, result); - - auto usedSpaceAfter = commandQueue->commandStream->getUsed(); - ASSERT_GT(usedSpaceAfter, usedSpaceBefore); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); - - auto sbaItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sbaItor); - auto cmdSba = genCmdCast(*sbaItor); - - auto sdiItor = find(sbaItor, cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); - auto cmdSdi = genCmdCast(*sdiItor); - - uint64_t gsbaGpuVa = cmdSba->getGeneralStateBaseAddress(); - EXPECT_EQ(static_cast(gsbaGpuVa & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(gsbaGpuVa >> 32), cmdSdi->getDataDword1()); - - auto expectedGpuVa = GmmHelper::decanonize(device->getL0Debugger()->getSbaTrackingGpuVa()) + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - - for (auto i = 0u; i < numCommandLists; i++) { - auto commandList = CommandList::fromHandle(commandLists[i]); - commandList->destroy(); - } - commandQueue->destroy(); -} - -HWTEST_F(L0DebuggerTest, givenDebuggingEnabledAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesArePrinted) { - DebugManagerStateRestore restorer; - NEO::DebugManager.flags.DebuggerLogBitmask.set(255); - - testing::internal::CaptureStdout(); - - ze_command_queue_desc_t queueDesc = {}; - ze_result_t returnValue; - auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); - ASSERT_NE(nullptr, commandQueue->commandStream); - - ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; - const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); - - auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - commandQueue->synchronize(0); - - std::string output = testing::internal::GetCapturedStdout(); - size_t pos = output.find("INFO: Debugger: SBA stored ssh"); - EXPECT_NE(std::string::npos, pos); - - pos = output.find("Debugger: SBA ssh"); - EXPECT_NE(std::string::npos, pos); - - auto commandList = CommandList::fromHandle(commandLists[0]); - commandList->destroy(); - - commandQueue->destroy(); -} - using L0DebuggerSimpleTest = Test; -HWTEST_F(L0DebuggerSimpleTest, givenNullL0DebuggerAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { - DebugManagerStateRestore restorer; - NEO::DebugManager.flags.DebuggerLogBitmask.set(255); - - EXPECT_EQ(nullptr, device->getL0Debugger()); - testing::internal::CaptureStdout(); - - ze_command_queue_desc_t queueDesc = {}; - ze_result_t returnValue; - auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); - ASSERT_NE(nullptr, commandQueue->commandStream); - - ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; - const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); - - auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - commandQueue->synchronize(0); - - std::string output = testing::internal::GetCapturedStdout(); - size_t pos = output.find("Debugger: SBA"); - EXPECT_EQ(std::string::npos, pos); - - auto commandList = CommandList::fromHandle(commandLists[0]); - commandList->destroy(); - - commandQueue->destroy(); -} - -HWTEST_F(L0DebuggerTest, givenL0DebuggerAndDebuggerLogsDisabledWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { - DebugManagerStateRestore restorer; - NEO::DebugManager.flags.DebuggerLogBitmask.set(0); - - EXPECT_NE(nullptr, device->getL0Debugger()); - testing::internal::CaptureStdout(); - - ze_command_queue_desc_t queueDesc = {}; - ze_result_t returnValue; - auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); - ASSERT_NE(nullptr, commandQueue->commandStream); - - ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; - const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); - - auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - commandQueue->synchronize(0); - - std::string output = testing::internal::GetCapturedStdout(); - size_t pos = output.find("Debugger: SBA"); - EXPECT_EQ(std::string::npos, pos); - - auto commandList = CommandList::fromHandle(commandLists[0]); - commandList->destroy(); - - commandQueue->destroy(); -} - -HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenNonCopyCommandListIsInititalizedOrResetThenSSHAddressIsTracked, NotGen8Or11) { - using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; - - size_t usedSpaceBefore = 0; - ze_result_t returnValue; - ze_command_list_handle_t commandListHandle = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); - auto commandList = CommandList::fromHandle(commandListHandle); - - auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); - ASSERT_GT(usedSpaceAfter, usedSpaceBefore); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( - cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); - - auto sbaItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sbaItor); - auto cmdSba = genCmdCast(*sbaItor); - - uint64_t sshGpuVa = cmdSba->getSurfaceStateBaseAddress(); - auto expectedGpuVa = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getHeapGpuBase(); - EXPECT_EQ(expectedGpuVa, sshGpuVa); - EXPECT_EQ(1u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); - - commandList->reset(); - EXPECT_EQ(2u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); - - commandList->destroy(); -} - HWTEST_F(L0DebuggerTest, givenDebuggerWhenAppendingKernelToCommandListThenBindlessSurfaceStateForDebugSurfaceIsProgrammedAtOffsetZero) { using RENDER_SURFACE_STATE = typename FamilyType::RENDER_SURFACE_STATE; @@ -478,34 +247,6 @@ HWTEST_F(L0DebuggerTest, givenDebuggerWhenAppendingKernelToCommandListThenDebugS EXPECT_EQ(actualMocs, mocsNoCache); } -HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenSbaBufferIsPushedToResidencyContainer, IsAtLeastSkl) { - ze_command_queue_desc_t queueDesc = {}; - - std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); - commandQueue->initialize(false, false); - - ze_result_t returnValue; - ze_command_list_handle_t commandLists[] = { - CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; - uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); - - auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); - EXPECT_EQ(ZE_RESULT_SUCCESS, result); - - auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(neoDevice->getDefaultEngine().commandStreamReceiver->getOsContext().getContextId()); - bool sbaFound = false; - - for (auto iter : commandQueue->residencyContainerSnapshot) { - if (iter == sbaBuffer) { - sbaFound = true; - } - } - EXPECT_TRUE(sbaFound); - - auto commandList = CommandList::fromHandle(commandLists[0]); - commandList->destroy(); -} - HWTEST_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionEnabledWithImmediateCommandListToInvokeNonKernelOperationsThenSuccessIsReturned) { DebugManagerStateRestore restorer; NEO::DebugManager.flags.EnableFlushTaskSubmission.set(true); @@ -815,277 +556,12 @@ HWTEST2_F(L0DebuggerSimpleTest, givenUseCsrImmediateSubmissionDisabledCommandLis commandList->destroy(); } -HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) { - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - auto debugger = std::make_unique>(neoDevice); - - debugger->sbaTrackingGpuVa.address = 0x45670000; - auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); - - StackVec buffer(4096); - NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); - uint64_t gsba = 0x60000; - uint64_t ssba = 0x1234567000; - uint64_t iba = 0xfff80000; - uint64_t ioba = 0x8100000; - uint64_t dsba = 0xffff0000aaaa0000; - - NEO::Debugger::SbaAddresses sbaAddresses = {}; - sbaAddresses.GeneralStateBaseAddress = gsba; - sbaAddresses.SurfaceStateBaseAddress = ssba; - sbaAddresses.InstructionBaseAddress = iba; - sbaAddresses.IndirectObjectBaseAddress = ioba; - sbaAddresses.DynamicStateBaseAddress = dsba; - sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; - - debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); - - EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); - - auto sdiItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); - auto cmdSdi = genCmdCast(*sdiItor); - - EXPECT_EQ(static_cast(gsba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(gsba >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); - EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); - - auto decanonizedAddress = GmmHelper::decanonize(dsba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); - EXPECT_EQ(static_cast(ioba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(ioba >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); - EXPECT_EQ(static_cast(iba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(iba >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); - EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); -} - -HWTEST_F(L0DebuggerSimpleTest, givenCanonizedGpuVasWhenProgrammingSbaTrackingThenNonCanonicalAddressesAreStored) { - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - auto debugger = std::make_unique>(neoDevice); - - debugger->sbaTrackingGpuVa.address = 0x45670000; - auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); - - StackVec buffer(4096); - NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); - uint64_t gsba = 0xffff800000060000; - uint64_t ssba = 0xffff801234567000; - uint64_t iba = 0xffff8000fff80000; - uint64_t ioba = 0xffff800008100000; - uint64_t dsba = 0xffff8000aaaa0000; - - NEO::Debugger::SbaAddresses sbaAddresses = {}; - sbaAddresses.GeneralStateBaseAddress = gsba; - sbaAddresses.SurfaceStateBaseAddress = ssba; - sbaAddresses.InstructionBaseAddress = iba; - sbaAddresses.IndirectObjectBaseAddress = ioba; - sbaAddresses.DynamicStateBaseAddress = dsba; - sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; - - debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); - - GenCmdList cmdList; - ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); - - EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); - - auto sdiItor = find(cmdList.begin(), cmdList.end()); - ASSERT_NE(cmdList.end(), sdiItor); - auto cmdSdi = genCmdCast(*sdiItor); - - auto decanonizedAddress = GmmHelper::decanonize(gsba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); - decanonizedAddress = GmmHelper::decanonize(ssba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); - decanonizedAddress = GmmHelper::decanonize(dsba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); - decanonizedAddress = GmmHelper::decanonize(ioba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); - decanonizedAddress = GmmHelper::decanonize(iba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); - - sdiItor++; - cmdSdi = genCmdCast(*sdiItor); - - expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); - decanonizedAddress = GmmHelper::decanonize(ssba); - EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); - EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); - EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); - EXPECT_TRUE(cmdSdi->getStoreQword()); -} - -HWTEST_F(L0DebuggerSimpleTest, givenZeroGpuVasWhenProgrammingSbaTrackingThenStreamIsNotUsed) { - using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; - auto debugger = std::make_unique>(neoDevice); - - debugger->sbaTrackingGpuVa.address = 0x45670000; - - StackVec buffer(4096); - NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); - uint64_t gsba = 0; - uint64_t ssba = 0; - - NEO::Debugger::SbaAddresses sbaAddresses = {}; - sbaAddresses.GeneralStateBaseAddress = gsba; - sbaAddresses.SurfaceStateBaseAddress = ssba; - - debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); - - EXPECT_EQ(0u, cmdStream.getUsed()); -} - HWTEST_F(L0DebuggerSimpleTest, whenAllocateCalledThenDebuggerIsCreated) { auto debugger = DebuggerL0Hw::allocate(neoDevice); EXPECT_NE(nullptr, debugger); delete debugger; } -HWTEST_F(L0DebuggerSimpleTest, givenNotChangedSurfaceStateWhenCapturingSBAThenNoTrackingCmdsAreAdded) { - auto debugger = std::make_unique>(neoDevice); - - debugger->sbaTrackingGpuVa.address = 0x45670000; - - NEO::CommandContainer container; - container.initialize(neoDevice, nullptr, true); - - NEO::Debugger::SbaAddresses sba = {}; - sba.SurfaceStateBaseAddress = 0x123456000; - - debugger->captureStateBaseAddress(container, sba); - auto sizeUsed = container.getCommandStream()->getUsed(); - - EXPECT_NE(0u, sizeUsed); - sba.SurfaceStateBaseAddress = 0; - - debugger->captureStateBaseAddress(container, sba); - auto sizeUsed2 = container.getCommandStream()->getUsed(); - - EXPECT_EQ(sizeUsed, sizeUsed2); -} - -HWTEST_F(L0DebuggerSimpleTest, givenChangedBaseAddressesWhenCapturingSBAThenNoTrackingCmdsAreAdded) { - auto debugger = std::make_unique>(neoDevice); - - debugger->sbaTrackingGpuVa.address = 0x45670000; - { - NEO::CommandContainer container; - container.initialize(neoDevice, nullptr, true); - - NEO::Debugger::SbaAddresses sba = {}; - sba.SurfaceStateBaseAddress = 0x123456000; - - debugger->captureStateBaseAddress(container, sba); - auto sizeUsed = container.getCommandStream()->getUsed(); - - EXPECT_NE(0u, sizeUsed); - } - - { - NEO::CommandContainer container; - container.initialize(neoDevice, nullptr, true); - - NEO::Debugger::SbaAddresses sba = {}; - sba.GeneralStateBaseAddress = 0x123456000; - - debugger->captureStateBaseAddress(container, sba); - auto sizeUsed = container.getCommandStream()->getUsed(); - - EXPECT_NE(0u, sizeUsed); - } - - { - NEO::CommandContainer container; - container.initialize(neoDevice, nullptr, true); - - NEO::Debugger::SbaAddresses sba = {}; - sba.BindlessSurfaceStateBaseAddress = 0x123456000; - - debugger->captureStateBaseAddress(container, sba); - auto sizeUsed = container.getCommandStream()->getUsed(); - - EXPECT_NE(0u, sizeUsed); - } -} - HWTEST_F(L0DebuggerSimpleTest, givenDebuggerWithoutMemoryOperationsHandlerWhenNotifyingModuleAllocationsThenNoAllocationIsResident) { auto debugger = std::make_unique>(neoDevice); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp new file mode 100644 index 0000000000..ecf0bb9e08 --- /dev/null +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp @@ -0,0 +1,641 @@ +/* + * Copyright (C) 2022 Intel Corporation + * + * SPDX-License-Identifier: MIT + * + */ + +#include "shared/source/gen_common/reg_configs_common.h" +#include "shared/source/helpers/preamble.h" +#include "shared/test/common/cmd_parse/gen_cmd_parse.h" +#include "shared/test/common/mocks/mock_gmm_helper.h" +#include "shared/test/common/test_macros/test.h" + +#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" +#include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" + +namespace L0 { +namespace ult { + +using L0DebuggerTest = Test; + +HWTEST_F(L0DebuggerTest, givenL0DebuggerWhenCreatedThenPerContextSbaTrackingBuffersAreAllocated) { + auto debugger = device->getL0Debugger(); + ASSERT_NE(nullptr, debugger); + + EXPECT_NE(0u, debugger->getSbaTrackingGpuVa()); + std::vector allocations; + + auto &allEngines = device->getNEODevice()->getMemoryManager()->getRegisteredEngines(); + + for (auto &engine : allEngines) { + auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId()); + ASSERT_NE(nullptr, sbaAllocation); + allocations.push_back(sbaAllocation); + + EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType()); + EXPECT_EQ(MemoryPool::System4KBPages, sbaAllocation->getMemoryPool()); + } + + for (uint32_t i = 0; i < allocations.size() - 1; i++) { + EXPECT_NE(allocations[i], allocations[i + 1]); + } + + EXPECT_EQ(allEngines.size(), getMockDebuggerL0Hw()->perContextSbaAllocations.size()); +} + +HWTEST_F(L0DebuggerTest, givenCreatedL0DebuggerThenSbaTrackingBuffersContainValidHeader) { + auto debugger = device->getL0Debugger(); + ASSERT_NE(nullptr, debugger); + + for (auto &sbaBuffer : getMockDebuggerL0Hw()->perContextSbaAllocations) { + auto sbaAllocation = sbaBuffer.second; + ASSERT_NE(nullptr, sbaAllocation); + + auto sbaHeader = reinterpret_cast(sbaAllocation->getUnderlyingBuffer()); + + EXPECT_STREQ("sbaarea", sbaHeader->magic); + EXPECT_EQ(0u, sbaHeader->BindlessSamplerStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->BindlessSurfaceStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->DynamicStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->GeneralStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->IndirectObjectBaseAddress); + EXPECT_EQ(0u, sbaHeader->InstructionBaseAddress); + EXPECT_EQ(0u, sbaHeader->SurfaceStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->Version); + } +} + +HWTEST_F(L0DebuggerTest, givenL0DebuggerWhenCreatedThenPerContextSbaTrackingBuffersAreAllocatedWithProperStorageInfo) { + auto debugger = device->getL0Debugger(); + ASSERT_NE(nullptr, debugger); + + EXPECT_NE(0u, debugger->getSbaTrackingGpuVa()); + std::vector allocations; + + for (auto &engine : device->getNEODevice()->getAllEngines()) { + auto sbaAllocation = debugger->getSbaTrackingBuffer(engine.osContext->getContextId()); + ASSERT_NE(nullptr, sbaAllocation); + allocations.push_back(sbaAllocation); + + EXPECT_EQ(NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, sbaAllocation->getAllocationType()); + if (sbaAllocation->isAllocatedInLocalMemoryPool()) { + EXPECT_EQ(neoDevice->getDeviceBitfield(), sbaAllocation->storageInfo.pageTablesVisibility); + } + EXPECT_FALSE(sbaAllocation->storageInfo.cloningOfPageTables); + EXPECT_FALSE(sbaAllocation->storageInfo.multiStorage); + EXPECT_FALSE(sbaAllocation->storageInfo.tileInstanced); + EXPECT_GE(neoDevice->getDeviceBitfield().to_ullong(), sbaAllocation->storageInfo.getMemoryBanks()); + EXPECT_EQ(1u, sbaAllocation->storageInfo.getNumBanks()); + } +} + +using L0DebuggerMultiSubDeviceTest = Test; + +HWTEST_F(L0DebuggerMultiSubDeviceTest, givenMultiSubDevicesWhenSbaTrackingBuffersAllocatedThenThereIsSeparatePhysicalStorageForEveryContext) { + auto debugger = std::make_unique>(neoDevice); + + const auto &engines = neoDevice->getAllEngines(); + EXPECT_LE(1u, engines.size()); + + for (auto &engine : engines) { + + auto contextId = engine.osContext->getContextId(); + const auto &storageInfo = debugger->perContextSbaAllocations[contextId]->storageInfo; + + EXPECT_FALSE(storageInfo.cloningOfPageTables); + EXPECT_EQ(DeviceBitfield{maxNBitValue(numSubDevices)}, storageInfo.memoryBanks); + EXPECT_EQ(DeviceBitfield{maxNBitValue(numSubDevices)}, storageInfo.pageTablesVisibility); + EXPECT_EQ(engine.osContext->getDeviceBitfield().to_ulong(), storageInfo.memoryBanks.to_ulong()); + EXPECT_TRUE(storageInfo.tileInstanced); + + for (uint32_t i = 0; i < numSubDevices; i++) { + auto sbaHeader = reinterpret_cast(ptrOffset(debugger->perContextSbaAllocations[contextId]->getUnderlyingBuffer(), + debugger->perContextSbaAllocations[contextId]->getUnderlyingBufferSize() * i)); + + EXPECT_STREQ("sbaarea", sbaHeader->magic); + EXPECT_EQ(0u, sbaHeader->BindlessSamplerStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->BindlessSurfaceStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->DynamicStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->GeneralStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->IndirectObjectBaseAddress); + EXPECT_EQ(0u, sbaHeader->InstructionBaseAddress); + EXPECT_EQ(0u, sbaHeader->SurfaceStateBaseAddress); + EXPECT_EQ(0u, sbaHeader->Version); + } + if (!debugger->singleAddressSpaceSbaTracking) { + EXPECT_EQ(debugger->sbaTrackingGpuVa.address, debugger->perContextSbaAllocations[contextId]->getGpuAddress()); + } else { + EXPECT_NE(debugger->sbaTrackingGpuVa.address, debugger->perContextSbaAllocations[contextId]->getGpuAddress()); + } + } + + const auto &subDevice0Engines = neoDevice->getSubDevice(0)->getAllEngines(); + const auto &subDevice1Engines = neoDevice->getSubDevice(1)->getAllEngines(); + + auto subDeviceEngineSets = {subDevice0Engines, subDevice1Engines}; + uint64_t subDeviceIndex = 0; + for (const auto &subDeviceEngines : subDeviceEngineSets) { + for (auto &engine : subDeviceEngines) { + + auto contextId = engine.osContext->getContextId(); + const auto &storageInfo = debugger->perContextSbaAllocations[contextId]->storageInfo; + + EXPECT_FALSE(storageInfo.cloningOfPageTables); + EXPECT_EQ(DeviceBitfield{1llu << subDeviceIndex}, storageInfo.memoryBanks); + EXPECT_EQ(DeviceBitfield{1llu << subDeviceIndex}, storageInfo.pageTablesVisibility); + EXPECT_EQ(engine.osContext->getDeviceBitfield().to_ulong(), storageInfo.memoryBanks.to_ulong()); + EXPECT_FALSE(storageInfo.tileInstanced); + + if (!debugger->singleAddressSpaceSbaTracking) { + EXPECT_EQ(debugger->sbaTrackingGpuVa.address, debugger->perContextSbaAllocations[contextId]->getGpuAddress()); + } else { + EXPECT_NE(debugger->sbaTrackingGpuVa.address, debugger->perContextSbaAllocations[contextId]->getGpuAddress()); + } + } + subDeviceIndex++; + } +} + +using NotGen8Or11 = AreNotGfxCores; + +HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndRequiredGsbaWhenCommandListIsExecutedThenProgramGsbaWritesToSbaTrackingBuffer, NotGen8Or11) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto cmdQ = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue); + ASSERT_NE(nullptr, cmdQ); + + auto commandQueue = whitebox_cast(cmdQ); + auto cmdQHw = static_cast *>(cmdQ); + + if (cmdQHw->estimateStateBaseAddressCmdSize() == 0) { + commandQueue->destroy(); + GTEST_SKIP(); + } + + auto usedSpaceBefore = commandQueue->commandStream->getUsed(); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + CommandList::fromHandle(commandLists[0])->setCommandListPerThreadScratchSize(4096); + + uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandQueue->commandStream->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, ptrOffset(commandQueue->commandStream->getCpuBase(), 0), usedSpaceAfter)); + + auto sbaItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sbaItor); + auto cmdSba = genCmdCast(*sbaItor); + + auto sdiItor = find(sbaItor, cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + auto cmdSdi = genCmdCast(*sdiItor); + + uint64_t gsbaGpuVa = cmdSba->getGeneralStateBaseAddress(); + EXPECT_EQ(static_cast(gsbaGpuVa & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(gsbaGpuVa >> 32), cmdSdi->getDataDword1()); + + auto expectedGpuVa = GmmHelper::decanonize(device->getL0Debugger()->getSbaTrackingGpuVa()) + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + + for (auto i = 0u; i < numCommandLists; i++) { + auto commandList = CommandList::fromHandle(commandLists[i]); + commandList->destroy(); + } + commandQueue->destroy(); +} + +HWTEST_F(L0DebuggerTest, givenDebuggingEnabledAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesArePrinted) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DebuggerLogBitmask.set(255); + + testing::internal::CaptureStdout(); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + commandQueue->synchronize(0); + + std::string output = testing::internal::GetCapturedStdout(); + size_t pos = output.find("INFO: Debugger: SBA stored ssh"); + EXPECT_NE(std::string::npos, pos); + + pos = output.find("Debugger: SBA ssh"); + EXPECT_NE(std::string::npos, pos); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); + + commandQueue->destroy(); +} + +using L0DebuggerSimpleTest = Test; + +HWTEST_F(L0DebuggerSimpleTest, givenNullL0DebuggerAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DebuggerLogBitmask.set(255); + + EXPECT_EQ(nullptr, device->getL0Debugger()); + testing::internal::CaptureStdout(); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + commandQueue->synchronize(0); + + std::string output = testing::internal::GetCapturedStdout(); + size_t pos = output.find("Debugger: SBA"); + EXPECT_EQ(std::string::npos, pos); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); + + commandQueue->destroy(); +} + +HWTEST_F(L0DebuggerTest, givenL0DebuggerAndDebuggerLogsDisabledWhenCommandQueueIsSynchronizedThenSbaAddressesAreNotPrinted) { + DebugManagerStateRestore restorer; + NEO::DebugManager.flags.DebuggerLogBitmask.set(0); + + EXPECT_NE(nullptr, device->getL0Debugger()); + testing::internal::CaptureStdout(); + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto commandQueue = whitebox_cast(CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, returnValue)); + ASSERT_NE(nullptr, commandQueue->commandStream); + + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + const uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + commandQueue->synchronize(0); + + std::string output = testing::internal::GetCapturedStdout(); + size_t pos = output.find("Debugger: SBA"); + EXPECT_EQ(std::string::npos, pos); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); + + commandQueue->destroy(); +} + +HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenNonCopyCommandListIsInititalizedOrResetThenSSHAddressIsTracked, NotGen8Or11) { + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + size_t usedSpaceBefore = 0; + ze_result_t returnValue; + ze_command_list_handle_t commandListHandle = CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle(); + auto commandList = CommandList::fromHandle(commandListHandle); + + auto usedSpaceAfter = commandList->commandContainer.getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer( + cmdList, commandList->commandContainer.getCommandStream()->getCpuBase(), usedSpaceAfter)); + + auto sbaItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sbaItor); + auto cmdSba = genCmdCast(*sbaItor); + + uint64_t sshGpuVa = cmdSba->getSurfaceStateBaseAddress(); + auto expectedGpuVa = commandList->commandContainer.getIndirectHeap(NEO::HeapType::SURFACE_STATE)->getHeapGpuBase(); + EXPECT_EQ(expectedGpuVa, sshGpuVa); + EXPECT_EQ(1u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); + + commandList->reset(); + EXPECT_EQ(2u, getMockDebuggerL0Hw()->captureStateBaseAddressCount); + + commandList->destroy(); +} + +HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledWhenCommandListIsExecutedThenSbaBufferIsPushedToResidencyContainer, IsAtLeastSkl) { + ze_command_queue_desc_t queueDesc = {}; + + std::unique_ptr, Deleter> commandQueue(new MockCommandQueueHw(device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc)); + commandQueue->initialize(false, false); + + ze_result_t returnValue; + ze_command_list_handle_t commandLists[] = { + CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)->toHandle()}; + uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + auto result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + auto sbaBuffer = device->getL0Debugger()->getSbaTrackingBuffer(neoDevice->getDefaultEngine().commandStreamReceiver->getOsContext().getContextId()); + bool sbaFound = false; + + for (auto iter : commandQueue->residencyContainerSnapshot) { + if (iter == sbaBuffer) { + sbaFound = true; + } + } + EXPECT_TRUE(sbaFound); + + auto commandList = CommandList::fromHandle(commandLists[0]); + commandList->destroy(); +} + +HWTEST_F(L0DebuggerSimpleTest, givenNonZeroGpuVasWhenProgrammingSbaTrackingThenCorrectCmdsAreAddedToStream) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + auto debugger = std::make_unique>(neoDevice); + + debugger->sbaTrackingGpuVa.address = 0x45670000; + auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); + + StackVec buffer(4096); + NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); + uint64_t gsba = 0x60000; + uint64_t ssba = 0x1234567000; + uint64_t iba = 0xfff80000; + uint64_t ioba = 0x8100000; + uint64_t dsba = 0xffff0000aaaa0000; + + NEO::Debugger::SbaAddresses sbaAddresses = {}; + sbaAddresses.GeneralStateBaseAddress = gsba; + sbaAddresses.SurfaceStateBaseAddress = ssba; + sbaAddresses.InstructionBaseAddress = iba; + sbaAddresses.IndirectObjectBaseAddress = ioba; + sbaAddresses.DynamicStateBaseAddress = dsba; + sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; + + debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); + + EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + auto cmdSdi = genCmdCast(*sdiItor); + + EXPECT_EQ(static_cast(gsba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(gsba >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); + EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); + + auto decanonizedAddress = GmmHelper::decanonize(dsba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); + EXPECT_EQ(static_cast(ioba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(ioba >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); + EXPECT_EQ(static_cast(iba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(iba >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); + EXPECT_EQ(static_cast(ssba & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(ssba >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); +} + +HWTEST_F(L0DebuggerSimpleTest, givenCanonizedGpuVasWhenProgrammingSbaTrackingThenNonCanonicalAddressesAreStored) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + auto debugger = std::make_unique>(neoDevice); + + debugger->sbaTrackingGpuVa.address = 0x45670000; + auto expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, GeneralStateBaseAddress); + + StackVec buffer(4096); + NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); + uint64_t gsba = 0xffff800000060000; + uint64_t ssba = 0xffff801234567000; + uint64_t iba = 0xffff8000fff80000; + uint64_t ioba = 0xffff800008100000; + uint64_t dsba = 0xffff8000aaaa0000; + + NEO::Debugger::SbaAddresses sbaAddresses = {}; + sbaAddresses.GeneralStateBaseAddress = gsba; + sbaAddresses.SurfaceStateBaseAddress = ssba; + sbaAddresses.InstructionBaseAddress = iba; + sbaAddresses.IndirectObjectBaseAddress = ioba; + sbaAddresses.DynamicStateBaseAddress = dsba; + sbaAddresses.BindlessSurfaceStateBaseAddress = ssba; + + debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::PARSE::parseCommandBuffer(cmdList, cmdStream.getCpuBase(), cmdStream.getUsed())); + + EXPECT_EQ(6 * sizeof(MI_STORE_DATA_IMM), cmdStream.getUsed()); + + auto sdiItor = find(cmdList.begin(), cmdList.end()); + ASSERT_NE(cmdList.end(), sdiItor); + auto cmdSdi = genCmdCast(*sdiItor); + + auto decanonizedAddress = GmmHelper::decanonize(gsba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, SurfaceStateBaseAddress); + decanonizedAddress = GmmHelper::decanonize(ssba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, DynamicStateBaseAddress); + decanonizedAddress = GmmHelper::decanonize(dsba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, IndirectObjectBaseAddress); + decanonizedAddress = GmmHelper::decanonize(ioba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, InstructionBaseAddress); + decanonizedAddress = GmmHelper::decanonize(iba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); + + sdiItor++; + cmdSdi = genCmdCast(*sdiItor); + + expectedGpuVa = debugger->sbaTrackingGpuVa.address + offsetof(SbaTrackedAddresses, BindlessSurfaceStateBaseAddress); + decanonizedAddress = GmmHelper::decanonize(ssba); + EXPECT_EQ(static_cast(decanonizedAddress & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(decanonizedAddress >> 32), cmdSdi->getDataDword1()); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + EXPECT_TRUE(cmdSdi->getStoreQword()); +} + +HWTEST_F(L0DebuggerSimpleTest, givenZeroGpuVasWhenProgrammingSbaTrackingThenStreamIsNotUsed) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + auto debugger = std::make_unique>(neoDevice); + + debugger->sbaTrackingGpuVa.address = 0x45670000; + + StackVec buffer(4096); + NEO::LinearStream cmdStream(buffer.begin(), buffer.size()); + uint64_t gsba = 0; + uint64_t ssba = 0; + + NEO::Debugger::SbaAddresses sbaAddresses = {}; + sbaAddresses.GeneralStateBaseAddress = gsba; + sbaAddresses.SurfaceStateBaseAddress = ssba; + + debugger->programSbaTrackingCommands(cmdStream, sbaAddresses); + + EXPECT_EQ(0u, cmdStream.getUsed()); +} + +HWTEST_F(L0DebuggerSimpleTest, givenNotChangedSurfaceStateWhenCapturingSBAThenNoTrackingCmdsAreAdded) { + auto debugger = std::make_unique>(neoDevice); + + debugger->sbaTrackingGpuVa.address = 0x45670000; + + NEO::CommandContainer container; + container.initialize(neoDevice, nullptr, true); + + NEO::Debugger::SbaAddresses sba = {}; + sba.SurfaceStateBaseAddress = 0x123456000; + + debugger->captureStateBaseAddress(container, sba); + auto sizeUsed = container.getCommandStream()->getUsed(); + + EXPECT_NE(0u, sizeUsed); + sba.SurfaceStateBaseAddress = 0; + + debugger->captureStateBaseAddress(container, sba); + auto sizeUsed2 = container.getCommandStream()->getUsed(); + + EXPECT_EQ(sizeUsed, sizeUsed2); +} + +HWTEST_F(L0DebuggerSimpleTest, givenChangedBaseAddressesWhenCapturingSBAThenNoTrackingCmdsAreAdded) { + auto debugger = std::make_unique>(neoDevice); + + debugger->sbaTrackingGpuVa.address = 0x45670000; + { + NEO::CommandContainer container; + container.initialize(neoDevice, nullptr, true); + + NEO::Debugger::SbaAddresses sba = {}; + sba.SurfaceStateBaseAddress = 0x123456000; + + debugger->captureStateBaseAddress(container, sba); + auto sizeUsed = container.getCommandStream()->getUsed(); + + EXPECT_NE(0u, sizeUsed); + } + + { + NEO::CommandContainer container; + container.initialize(neoDevice, nullptr, true); + + NEO::Debugger::SbaAddresses sba = {}; + sba.GeneralStateBaseAddress = 0x123456000; + + debugger->captureStateBaseAddress(container, sba); + auto sizeUsed = container.getCommandStream()->getUsed(); + + EXPECT_NE(0u, sizeUsed); + } + + { + NEO::CommandContainer container; + container.initialize(neoDevice, nullptr, true); + + NEO::Debugger::SbaAddresses sba = {}; + sba.BindlessSurfaceStateBaseAddress = 0x123456000; + + debugger->captureStateBaseAddress(container, sba); + auto sizeUsed = container.getCommandStream()->getUsed(); + + EXPECT_NE(0u, sizeUsed); + } +} + +} // namespace ult +} // namespace L0 diff --git a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp index 59891d8672..35abbe5e2a 100644 --- a/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp +++ b/opencl/test/unit_test/os_interface/linux/drm_memory_manager_tests.cpp @@ -5943,4 +5943,92 @@ HWTEST_F(DrmMemoryManagerTest, givenCompletionFenceEnabledWhenHandlingCompletion memoryManager->freeGraphicsMemory(allocation); } +TEST_F(DrmMemoryManagerTest, givenMultiSubDevicesBitfieldWhenAllocatingSbaTrackingBufferThenCorrectMultiHostAllocationReturned) { + mock->ioctl_expected.total = -1; + + NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, + NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, + false, false, + 0b0011}; + + const uint64_t gpuAddresses[] = {0, 0x12340000}; + + for (auto gpuAddress : gpuAddresses) { + properties.gpuAddress = gpuAddress; + + auto sbaBuffer = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); + + EXPECT_NE(nullptr, sbaBuffer); + + EXPECT_EQ(MemoryPool::System4KBPages, sbaBuffer->getMemoryPool()); + EXPECT_EQ(2u, sbaBuffer->getNumGmms()); + + EXPECT_NE(nullptr, sbaBuffer->getUnderlyingBuffer()); + EXPECT_EQ(MemoryConstants::pageSize, sbaBuffer->getUnderlyingBufferSize()); + + auto &bos = sbaBuffer->getBOs(); + + EXPECT_NE(nullptr, bos[0]); + EXPECT_NE(nullptr, bos[1]); + + if (gpuAddress != 0) { + EXPECT_EQ(gpuAddress, sbaBuffer->getGpuAddress()); + + EXPECT_EQ(gpuAddress, bos[0]->peekAddress()); + EXPECT_EQ(gpuAddress, bos[1]->peekAddress()); + EXPECT_EQ(0u, sbaBuffer->getReservedAddressPtr()); + } else { + EXPECT_EQ(bos[0]->peekAddress(), bos[1]->peekAddress()); + EXPECT_NE(nullptr, sbaBuffer->getReservedAddressPtr()); + EXPECT_NE(0u, sbaBuffer->getGpuAddress()); + } + + EXPECT_EQ(nullptr, bos[2]); + EXPECT_EQ(nullptr, bos[3]); + + memoryManager->freeGraphicsMemory(sbaBuffer); + } +} + +TEST_F(DrmMemoryManagerTest, givenSingleSubDevicesBitfieldWhenAllocatingSbaTrackingBufferThenSingleHostAllocationReturned) { + mock->ioctl_expected.total = -1; + + NEO::AllocationProperties properties{device->getRootDeviceIndex(), true, MemoryConstants::pageSize, + NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER, + false, false, + 0b0001}; + + const uint64_t gpuAddresses[] = {0, 0x12340000}; + + for (auto gpuAddress : gpuAddresses) { + properties.gpuAddress = gpuAddress; + + auto sbaBuffer = static_cast(memoryManager->allocateGraphicsMemoryWithProperties(properties)); + + EXPECT_NE(nullptr, sbaBuffer); + + EXPECT_EQ(MemoryPool::System4KBPages, sbaBuffer->getMemoryPool()); + EXPECT_EQ(1u, sbaBuffer->getNumGmms()); + + EXPECT_NE(nullptr, sbaBuffer->getUnderlyingBuffer()); + EXPECT_EQ(MemoryConstants::pageSize, sbaBuffer->getUnderlyingBufferSize()); + + auto &bos = sbaBuffer->getBOs(); + + EXPECT_NE(nullptr, bos[0]); + EXPECT_EQ(nullptr, bos[1]); + EXPECT_EQ(nullptr, bos[2]); + EXPECT_EQ(nullptr, bos[3]); + + if (gpuAddress != 0) { + EXPECT_EQ(gpuAddress, sbaBuffer->getGpuAddress()); + EXPECT_EQ(gpuAddress, bos[0]->peekAddress()); + } else { + EXPECT_NE(0u, sbaBuffer->getGpuAddress()); + } + + memoryManager->freeGraphicsMemory(sbaBuffer); + } +} + } // namespace NEO diff --git a/shared/source/memory_manager/definitions/storage_info.cpp b/shared/source/memory_manager/definitions/storage_info.cpp index d815966c14..55cb35030e 100644 --- a/shared/source/memory_manager/definitions/storage_info.cpp +++ b/shared/source/memory_manager/definitions/storage_info.cpp @@ -71,6 +71,7 @@ StorageInfo MemoryManager::createStorageInfoFromProperties(const AllocationPrope storageInfo.tileInstanced = true; break; case AllocationType::PRIVATE_SURFACE: + case AllocationType::DEBUG_SBA_TRACKING_BUFFER: storageInfo.cloningOfPageTables = false; if (properties.subDevicesBitfield.count() == 1) { diff --git a/shared/source/memory_manager/graphics_allocation.h b/shared/source/memory_manager/graphics_allocation.h index b299ad5a77..02fe659f5b 100644 --- a/shared/source/memory_manager/graphics_allocation.h +++ b/shared/source/memory_manager/graphics_allocation.h @@ -187,6 +187,7 @@ class GraphicsAllocation : public IDNode { allocationType == AllocationType::RING_BUFFER || allocationType == AllocationType::SEMAPHORE_BUFFER || allocationType == AllocationType::DEBUG_CONTEXT_SAVE_AREA || + allocationType == AllocationType::DEBUG_SBA_TRACKING_BUFFER || allocationType == AllocationType::GPU_TIMESTAMP_DEVICE_BUFFER || allocationType == AllocationType::DEBUG_MODULE_AREA; } diff --git a/shared/source/memory_manager/memory_manager.cpp b/shared/source/memory_manager/memory_manager.cpp index 700971bc05..508c4562f0 100644 --- a/shared/source/memory_manager/memory_manager.cpp +++ b/shared/source/memory_manager/memory_manager.cpp @@ -430,7 +430,8 @@ bool MemoryManager::getAllocationData(AllocationData &allocationData, const Allo allocationData.flags.multiOsContextCapable = properties.flags.multiOsContextCapable; allocationData.usmInitialPlacement = properties.usmInitialPlacement; - if (properties.allocationType == AllocationType::DEBUG_CONTEXT_SAVE_AREA) { + if (properties.allocationType == AllocationType::DEBUG_CONTEXT_SAVE_AREA || + properties.allocationType == AllocationType::DEBUG_SBA_TRACKING_BUFFER) { allocationData.flags.zeroMemory = 1; } @@ -700,7 +701,8 @@ bool MemoryManager::copyMemoryToAllocation(GraphicsAllocation *graphicsAllocatio for (auto i = 0u; i < graphicsAllocation->storageInfo.getNumBanks(); ++i) { memcpy_s(ptrOffset(static_cast(graphicsAllocation->getUnderlyingBuffer()) + i * graphicsAllocation->getUnderlyingBufferSize(), destinationOffset), (graphicsAllocation->getUnderlyingBufferSize() - destinationOffset), memoryToCopy, sizeToCopy); - if (graphicsAllocation->getAllocationType() != AllocationType::DEBUG_CONTEXT_SAVE_AREA) { + if (graphicsAllocation->getAllocationType() != AllocationType::DEBUG_CONTEXT_SAVE_AREA && + graphicsAllocation->getAllocationType() != AllocationType::DEBUG_SBA_TRACKING_BUFFER) { break; } } diff --git a/shared/source/memory_manager/os_agnostic_memory_manager.cpp b/shared/source/memory_manager/os_agnostic_memory_manager.cpp index f0f9851cb7..420a012f87 100644 --- a/shared/source/memory_manager/os_agnostic_memory_manager.cpp +++ b/shared/source/memory_manager/os_agnostic_memory_manager.cpp @@ -77,7 +77,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment sizeAligned = alignUp(allocationData.size, MemoryConstants::pageSize2Mb); } - if (allocationData.type == AllocationType::DEBUG_CONTEXT_SAVE_AREA) { + if (allocationData.type == AllocationType::DEBUG_CONTEXT_SAVE_AREA || + allocationData.type == AllocationType::DEBUG_SBA_TRACKING_BUFFER) { sizeAligned *= allocationData.storageInfo.getNumBanks(); } @@ -100,7 +101,8 @@ GraphicsAllocation *OsAgnosticMemoryManager::allocateGraphicsMemoryWithAlignment memoryAllocation->setCpuPtrAndGpuAddress(ptr, reinterpret_cast(gpuPtr)); } - if (allocationData.type == AllocationType::DEBUG_CONTEXT_SAVE_AREA) { + if (allocationData.type == AllocationType::DEBUG_CONTEXT_SAVE_AREA || + allocationData.type == AllocationType::DEBUG_SBA_TRACKING_BUFFER) { memoryAllocation->storageInfo = allocationData.storageInfo; } diff --git a/shared/source/os_interface/linux/drm_memory_manager.cpp b/shared/source/os_interface/linux/drm_memory_manager.cpp index cfe4f0fa08..595a7269f6 100644 --- a/shared/source/os_interface/linux/drm_memory_manager.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager.cpp @@ -277,7 +277,9 @@ DrmAllocation *DrmMemoryManager::createGraphicsAllocation(OsHandleStorage &handl } DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithAlignment(const AllocationData &allocationData) { - if (allocationData.type == NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA) { + if (allocationData.type == NEO::AllocationType::DEBUG_CONTEXT_SAVE_AREA || + (allocationData.type == NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER && + allocationData.storageInfo.subDeviceBitfield.count() > 1)) { return createMultiHostAllocation(allocationData); } @@ -412,6 +414,12 @@ DrmAllocation *DrmMemoryManager::allocateGraphicsMemoryWithHostPtr(const Allocat } GraphicsAllocation *DrmMemoryManager::allocateGraphicsMemoryWithGpuVa(const AllocationData &allocationData) { + + if (allocationData.type == NEO::AllocationType::DEBUG_SBA_TRACKING_BUFFER && + allocationData.storageInfo.subDeviceBitfield.count() > 1) { + return createMultiHostAllocation(allocationData); + } + auto osContextLinux = static_cast(allocationData.osContext); const size_t minAlignment = getUserptrAlignment(); diff --git a/shared/source/os_interface/linux/drm_memory_manager_create_multi_host_allocation.cpp b/shared/source/os_interface/linux/drm_memory_manager_create_multi_host_allocation.cpp index 4ea2f568f1..c2af2402c1 100644 --- a/shared/source/os_interface/linux/drm_memory_manager_create_multi_host_allocation.cpp +++ b/shared/source/os_interface/linux/drm_memory_manager_create_multi_host_allocation.cpp @@ -16,7 +16,6 @@ DrmAllocation *DrmMemoryManager::createMultiHostAllocation(const AllocationData if (!isAligned(allocationData.size)) { return nullptr; } - auto numTiles = allocationData.storageInfo.getNumBanks(); auto sizePerTile = allocationData.size; auto hostSizeToAllocate = numTiles * sizePerTile; @@ -28,16 +27,27 @@ DrmAllocation *DrmMemoryManager::createMultiHostAllocation(const AllocationData zeroCpuMemoryIfRequested(allocationData, cpuBasePointer, hostSizeToAllocate); - auto gpuAddress = acquireGpuRange(sizePerTile, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD); + auto gpuAddress = allocationData.gpuAddress; + bool addressReserved = false; + if (gpuAddress == 0) { + gpuAddress = acquireGpuRange(sizePerTile, allocationData.rootDeviceIndex, HeapIndex::HEAP_STANDARD); + addressReserved = true; + } else { + gpuAddress = allocationData.gpuAddress; + } + auto allocation = new DrmAllocation(allocationData.rootDeviceIndex, numTiles, allocationData.type, nullptr /*bo*/, cpuBasePointer, gpuAddress, sizePerTile, MemoryPool::System4KBPages); allocation->storageInfo = allocationData.storageInfo; allocation->setFlushL3Required(true); allocation->setUncacheable(true); - allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), sizePerTile); allocation->setDriverAllocatedCpuPtr(cpuBasePointer); + if (addressReserved) { + allocation->setReservedAddressRange(reinterpret_cast(gpuAddress), sizePerTile); + } + for (auto tile = 0u, currentBank = 0u; tile < numTiles; ++tile, ++currentBank) { while (!allocationData.storageInfo.memoryBanks.test(currentBank)) { ++currentBank;