diff --git a/level_zero/core/test/aub_tests/debugger/debugger_aub_tests.cpp b/level_zero/core/test/aub_tests/debugger/debugger_aub_tests.cpp index 801c4d5994..4413ec6325 100644 --- a/level_zero/core/test/aub_tests/debugger/debugger_aub_tests.cpp +++ b/level_zero/core/test/aub_tests/debugger/debugger_aub_tests.cpp @@ -8,6 +8,7 @@ #include "shared/source/debugger/debugger_l0.h" #include "shared/source/gmm_helper/gmm_helper.h" #include "shared/source/helpers/array_count.h" +#include "shared/source/helpers/bindless_heaps_helper.h" #include "shared/source/helpers/file_io.h" #include "shared/source/helpers/register_offsets.h" #include "shared/source/indirect_heap/indirect_heap.h" @@ -149,5 +150,125 @@ HWTEST2_F(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExe driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst); } +struct DebuggerSingleAddressSpaceGlobalBindlessAllocatorAubFixture : public DebuggerAubFixture { + void setUp() { + NEO::debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); + NEO::debugManager.flags.UseBindlessMode.set(1); + DebuggerAubFixture::setUp(); + } + void tearDown() { + DebuggerAubFixture::tearDown(); + } +}; +using DebuggerGlobalAllocatorAub = Test; +using PlatformsSupportingGlobalBindless = IsWithinGfxCore; + +HWTEST2_F(DebuggerGlobalAllocatorAub, GivenKernelWithScratchWhenCmdListExecutedThenSbaAddressesAreTracked, PlatformsSupportingGlobalBindless) { + + const uint32_t arraySize = 32; + const uint32_t typeSize = sizeof(int); + + uint32_t bufferSize = (arraySize * 2 + 1) * typeSize - 4; + const uint32_t groupSize[] = {arraySize, 1, 1}; + const uint32_t groupCount[] = {1, 1, 1}; + + memoryManager = neoDevice->getMemoryManager(); + gmmHelper = neoDevice->getGmmHelper(); + rootDeviceIndex = neoDevice->getRootDeviceIndex(); + + NEO::debugManager.flags.UpdateCrossThreadDataSize.set(true); + + ASSERT_NE(nullptr, neoDevice->getBindlessHeapsHelper()); + + NEO::SVMAllocsManager::UnifiedMemoryProperties unifiedMemoryProperties(InternalMemoryType::hostUnifiedMemory, + 1, + context->rootDeviceIndices, + context->deviceBitfields); + + auto bufferDst = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); + memset(bufferDst, 0, bufferSize); + auto bufferSrc = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(bufferSize, unifiedMemoryProperties); + memset(bufferSrc, 0, bufferSize); + auto bufferOffset = driverHandle->svmAllocsManager->createHostUnifiedMemoryAllocation(128 * arraySize, unifiedMemoryProperties); + memset(bufferOffset, 0, 128 * arraySize); + + int *srcBufferInt = static_cast(bufferSrc); + std::unique_ptr expectedMemoryInt = std::make_unique(bufferSize / typeSize); + const int expectedVal1 = 16256; + const int expectedVal2 = 512; + + for (uint32_t i = 0; i < arraySize; ++i) { + srcBufferInt[i] = 2; + expectedMemoryInt[i * 2] = expectedVal1; + expectedMemoryInt[i * 2 + 1] = expectedVal2; + } + + auto simulatedCsr = AUBFixtureL0::getSimulatedCsr(); + simulatedCsr->initializeEngine(); + + simulatedCsr->writeMemory(*driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()); + + ze_group_count_t dispatchTraits; + dispatchTraits.groupCountX = groupCount[0]; + dispatchTraits.groupCountY = groupCount[1]; + dispatchTraits.groupCountZ = groupCount[2]; + + module = static_cast(Module::fromHandle(createModuleFromFile("simple_spill_fill_kernel", context, device, "", true))); + + ze_kernel_handle_t kernel; + ze_kernel_desc_t kernelDesc = {ZE_STRUCTURE_TYPE_KERNEL_DESC}; + kernelDesc.pKernelName = "spill_test"; + + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelCreate(module->toHandle(), &kernelDesc, &kernel)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 0, sizeof(void *), &bufferSrc)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 1, sizeof(void *), &bufferDst)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetArgumentValue(kernel, 2, sizeof(void *), &bufferOffset)); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelSetGroupSize(kernel, groupSize[0], groupSize[1], groupSize[2])); + + ze_command_list_handle_t cmdListHandle = commandList->toHandle(); + EXPECT_EQ(ZE_RESULT_SUCCESS, zeCommandListAppendLaunchKernel(cmdListHandle, kernel, &dispatchTraits, nullptr, 0, nullptr)); + commandList->close(); + + pCmdq->executeCommandLists(1, &cmdListHandle, nullptr, false, nullptr); + pCmdq->synchronize(std::numeric_limits::max()); + + expectMemory(reinterpret_cast(driverHandle->svmAllocsManager->getSVMAlloc(bufferDst)->gpuAllocations.getDefaultGraphicsAllocation()->getGpuAddress()), + expectedMemoryInt.get(), bufferSize); + + const auto sbaAddress = device->getL0Debugger()->getSbaTrackingBuffer(csr->getOsContext().getContextId())->getGpuAddress(); + auto instructionHeapBaseAddress = memoryManager->getInternalHeapBaseAddress(rootDeviceIndex, + memoryManager->isLocalMemoryUsedForIsa(rootDeviceIndex)); + instructionHeapBaseAddress = gmmHelper->canonize(instructionHeapBaseAddress); + + expectMemory(reinterpret_cast(sbaAddress + offsetof(NEO::SbaTrackedAddresses, instructionBaseAddress)), + &instructionHeapBaseAddress, sizeof(instructionHeapBaseAddress)); + + auto commandListSurfaceHeapAllocation = commandList->commandContainer.getIndirectHeap(HeapType::surfaceState); + + auto surfaceStateBaseAddress = commandListSurfaceHeapAllocation->getGraphicsAllocation()->getGpuAddress(); + surfaceStateBaseAddress = gmmHelper->canonize(surfaceStateBaseAddress); + + expectMemory(reinterpret_cast(sbaAddress + offsetof(NEO::SbaTrackedAddresses, surfaceStateBaseAddress)), + &surfaceStateBaseAddress, sizeof(surfaceStateBaseAddress)); + + auto bindlessSurfaceStateBaseAddress = neoDevice->getBindlessHeapsHelper()->getGlobalHeapsBase(); + expectMemory(reinterpret_cast(sbaAddress + offsetof(NEO::SbaTrackedAddresses, bindlessSurfaceStateBaseAddress)), + &bindlessSurfaceStateBaseAddress, sizeof(bindlessSurfaceStateBaseAddress)); + + auto commandListDynamicHeapAllocation = commandList->commandContainer.getIndirectHeap(HeapType::dynamicState); + if (commandListDynamicHeapAllocation) { + auto dynamicStateBaseAddress = commandListDynamicHeapAllocation->getGraphicsAllocation()->getGpuAddress(); + dynamicStateBaseAddress = gmmHelper->canonize(dynamicStateBaseAddress); + + expectMemory(reinterpret_cast(sbaAddress + offsetof(NEO::SbaTrackedAddresses, dynamicStateBaseAddress)), + &bindlessSurfaceStateBaseAddress, sizeof(bindlessSurfaceStateBaseAddress)); + } + + EXPECT_EQ(ZE_RESULT_SUCCESS, zeKernelDestroy(kernel)); + driverHandle->svmAllocsManager->freeSVMAlloc(bufferDst); + driverHandle->svmAllocsManager->freeSVMAlloc(bufferSrc); + driverHandle->svmAllocsManager->freeSVMAlloc(bufferOffset); +} + } // namespace ult } // namespace L0 diff --git a/level_zero/core/test/aub_tests/fixtures/aub_fixture.cpp b/level_zero/core/test/aub_tests/fixtures/aub_fixture.cpp index 43b87c5ff0..a385914e4e 100644 --- a/level_zero/core/test/aub_tests/fixtures/aub_fixture.cpp +++ b/level_zero/core/test/aub_tests/fixtures/aub_fixture.cpp @@ -105,10 +105,14 @@ void AUBFixtureL0::tearDown() { pCmdq->destroy(); } -ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags) { +ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags, bool useSharedFile) { ze_module_handle_t moduleHandle; std::string testFile; - retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin"); + if (useSharedFile) { + retrieveBinaryKernelFilename(testFile, fileName + "_", ".bin"); + } else { + retrieveBinaryKernelFilenameApiSpecific(testFile, fileName + "_", ".bin"); + } size_t size = 0; auto src = loadDataFromFile(testFile.c_str(), size); @@ -129,4 +133,5 @@ ze_module_handle_t AUBFixtureL0::createModuleFromFile(const std::string &fileNam EXPECT_EQ(ZE_RESULT_SUCCESS, zeModuleCreate(context, device, &moduleDesc, &moduleHandle, nullptr)); return moduleHandle; } + } // namespace L0 diff --git a/level_zero/core/test/aub_tests/fixtures/aub_fixture.h b/level_zero/core/test/aub_tests/fixtures/aub_fixture.h index 53e2bfc9d4..c0acbe1518 100644 --- a/level_zero/core/test/aub_tests/fixtures/aub_fixture.h +++ b/level_zero/core/test/aub_tests/fixtures/aub_fixture.h @@ -101,7 +101,7 @@ class AUBFixtureL0 { } } - static ze_module_handle_t createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags); + static ze_module_handle_t createModuleFromFile(const std::string &fileName, ze_context_handle_t context, ze_device_handle_t device, const std::string &buildFlags, bool useSharedFile = false); std::string aubFileName; std::unique_ptr> backupUltConfig; diff --git a/level_zero/core/test/aub_tests/xe_hpg/test_excludes_xe_hpg.cpp b/level_zero/core/test/aub_tests/xe_hpg/test_excludes_xe_hpg.cpp index 7690081e57..bcb644f565 100644 --- a/level_zero/core/test/aub_tests/xe_hpg/test_excludes_xe_hpg.cpp +++ b/level_zero/core/test/aub_tests/xe_hpg/test_excludes_xe_hpg.cpp @@ -10,5 +10,7 @@ namespace L0 { namespace ult { HWTEST_EXCLUDE_PRODUCT(DebuggerSingleAddressSpaceAub, GivenSingleAddressSpaceWhenCmdListIsExecutedThenSbaAddressesAreTracked_PlatformsSupportingSingleAddressSpace, IGFX_XE_HPG_CORE); -} + +HWTEST_EXCLUDE_PRODUCT(DebuggerGlobalAllocatorAub, GivenKernelWithScratchWhenCmdListExecutedThenSbaAddressesAreTracked_PlatformsSupportingGlobalBindless, IGFX_XE_HPG_CORE); +} // namespace ult } // namespace L0 diff --git a/level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h b/level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h index 9247f91516..356df1f956 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h +++ b/level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h @@ -111,6 +111,19 @@ struct L0DebuggerPerContextAddressSpaceFixture : public L0DebuggerHwFixture { DebugManagerStateRestore restorer; }; +struct L0DebuggerPerContextAddressSpaceGlobalBindlessFixture : public L0DebuggerHwFixture { + void setUp() { + NEO::debugManager.flags.DebuggerForceSbaTrackingMode.set(0); + NEO::debugManager.flags.UseBindlessMode.set(1); + NEO::debugManager.flags.UseExternalAllocatorForSshAndDsh.set(1); + L0DebuggerHwFixture::setUp(); + } + void tearDown() { + L0DebuggerHwFixture::tearDown(); + } + DebugManagerStateRestore restorer; +}; + struct L0DebuggerSingleAddressSpaceFixture : public L0DebuggerHwFixture { void setUp() { NEO::debugManager.flags.DebuggerForceSbaTrackingMode.set(1); diff --git a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp index 8d2a6c8d8f..b7909a0ca1 100644 --- a/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp +++ b/level_zero/core/test/unit_tests/sources/debugger/test_l0_debugger_sba_tracking.cpp @@ -17,6 +17,8 @@ #include "level_zero/core/source/cmdlist/cmdlist.h" #include "level_zero/core/test/unit_tests/fixtures/device_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" +#include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include "level_zero/core/test/unit_tests/sources/debugger/l0_debugger_fixture.h" namespace L0 { @@ -179,6 +181,90 @@ HWTEST2_F(L0DebuggerPerContextAddressSpaceTest, givenDebuggingEnabledAndRequired commandQueue->destroy(); } +using L0DebuggerPerContextAddressSpaceGlobalBindlessTest = Test; +using PlatformsSupportingGlobalBindless = IsWithinGfxCore; + +HWTEST2_F(L0DebuggerPerContextAddressSpaceGlobalBindlessTest, givenDebuggingEnabledAndRequiredSshWhenCommandListIsExecutedThenProgramSsbaWritesToSbaTrackingBuffer, PlatformsSupportingGlobalBindless) { + using MI_STORE_DATA_IMM = typename FamilyType::MI_STORE_DATA_IMM; + using STATE_BASE_ADDRESS = typename FamilyType::STATE_BASE_ADDRESS; + + ze_command_queue_desc_t queueDesc = {}; + ze_result_t returnValue; + auto cmdQ = CommandQueue::create(productFamily, device, neoDevice->getDefaultEngine().commandStreamReceiver, &queueDesc, false, false, false, returnValue); + ASSERT_NE(nullptr, cmdQ); + + auto commandQueue = whiteboxCast(cmdQ); + auto usedSpaceBefore = commandQueue->commandStream.getUsed(); + + auto commandList = CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false); + ze_command_list_handle_t commandLists[] = {commandList->toHandle()}; + + Mock module(device, nullptr, ModuleType::user); + Mock kernel; + kernel.module = &module; + ze_group_count_t groupCount{1, 1, 1}; + + kernel.descriptor.kernelAttributes.perThreadScratchSize[0] = 0x40; + + CmdListKernelLaunchParams launchParams = {}; + auto result = commandList->appendLaunchKernel(kernel.toHandle(), groupCount, nullptr, 0, nullptr, launchParams, false); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + CommandList::fromHandle(commandLists[0])->close(); + + uint32_t numCommandLists = sizeof(commandLists) / sizeof(commandLists[0]); + + result = commandQueue->executeCommandLists(numCommandLists, commandLists, nullptr, true, nullptr); + ASSERT_EQ(ZE_RESULT_SUCCESS, result); + + auto usedSpaceAfter = commandList->getCmdContainer().getCommandStream()->getUsed(); + ASSERT_GT(usedSpaceAfter, usedSpaceBefore); + + GenCmdList cmdList; + ASSERT_TRUE(FamilyType::Parse::parseCommandBuffer( + cmdList, ptrOffset(commandList->getCmdContainer().getCommandStream()->getCpuBase(), 0), usedSpaceAfter)); + + auto sbaItors = findAll(cmdList.begin(), cmdList.end()); + ASSERT_NE(0u, sbaItors.size()); + + auto sbaItor = sbaItors[sbaItors.size() - 1]; + + ASSERT_NE(cmdList.end(), sbaItor); + auto cmdSba = genCmdCast(*sbaItor); + + auto sdiItors = findAll(sbaItor, cmdList.end()); + ASSERT_NE(0u, sdiItors.size()); + + auto cmdSdi = genCmdCast(*sdiItors[0]); + + auto gmmHelper = neoDevice->getGmmHelper(); + auto expectedSshGpuVa = commandList->getCmdContainer().getIndirectHeap(HeapType::surfaceState)->getGpuBase(); + + for (size_t i = 0; i < sdiItors.size(); i++) { + cmdSdi = genCmdCast(*sdiItors[i]); + uint64_t address = cmdSdi->getDataDword1(); + address <<= 32; + address = address | cmdSdi->getDataDword0(); + if (expectedSshGpuVa == address) { + break; + } + cmdSdi = nullptr; + } + + ASSERT_NE(nullptr, cmdSdi); + uint64_t ssbaGpuVa = gmmHelper->canonize(cmdSba->getSurfaceStateBaseAddress()); + EXPECT_EQ(static_cast(ssbaGpuVa & 0x0000FFFFFFFFULL), cmdSdi->getDataDword0()); + EXPECT_EQ(static_cast(ssbaGpuVa >> 32), cmdSdi->getDataDword1()); + + auto expectedGpuVa = gmmHelper->decanonize(device->getL0Debugger()->getSbaTrackingGpuVa()) + offsetof(NEO::SbaTrackedAddresses, surfaceStateBaseAddress); + EXPECT_EQ(expectedGpuVa, cmdSdi->getAddress()); + + for (auto i = 0u; i < numCommandLists; i++) { + auto commandList = CommandList::fromHandle(commandLists[i]); + commandList->destroy(); + } + commandQueue->destroy(); +} + HWTEST2_F(L0DebuggerTest, givenDebuggingEnabledAndDebuggerLogsWhenCommandQueueIsSynchronizedThenSbaAddressesArePrinted, Gen12Plus) { auto &compilerProductHelper = neoDevice->getCompilerProductHelper(); diff --git a/level_zero/tools/source/debug/debug_session_imp.cpp b/level_zero/tools/source/debug/debug_session_imp.cpp index 243f5ddea3..47963c830d 100644 --- a/level_zero/tools/source/debug/debug_session_imp.cpp +++ b/level_zero/tools/source/debug/debug_session_imp.cpp @@ -1315,6 +1315,17 @@ ze_result_t DebugSessionImp::readSbaRegisters(EuThread::ThreadId threadId, uint3 packed.push_back(bindingTableBaseAddress); packed.push_back(scratchSpaceBaseAddress); + PRINT_DEBUGGER_INFO_LOG("Debug session : SBA ssh = %" SCNx64 + " gsba = %" SCNx64 + " dsba = %" SCNx64 + " ioba = %" SCNx64 + " iba = %" SCNx64 + " bsurfsba = %" SCNx64 + " btba = %" SCNx64 + " scrsba = %" SCNx64 "\n", + sbaBuffer.surfaceStateBaseAddress, sbaBuffer.generalStateBaseAddress, sbaBuffer.dynamicStateBaseAddress, + sbaBuffer.indirectObjectBaseAddress, sbaBuffer.instructionBaseAddress, sbaBuffer.bindlessSurfaceStateBaseAddress, bindingTableBaseAddress, scratchSpaceBaseAddress); + size_t size = count * sbaRegDesc->bytes; memcpy_s(pRegisterValues, size, &packed[start], size); diff --git a/shared/source/command_container/command_encoder_xehp_and_later.inl b/shared/source/command_container/command_encoder_xehp_and_later.inl index d2af0df363..8c3a656b13 100644 --- a/shared/source/command_container/command_encoder_xehp_and_later.inl +++ b/shared/source/command_container/command_encoder_xehp_and_later.inl @@ -323,6 +323,12 @@ void EncodeDispatchKernel::encode(CommandContainer &container, EncodeDis }; EncodeStateBaseAddress::encode(encodeStateBaseAddressArgs); container.setDirtyStateForAllHeaps(false); + + bool sbaTrackingEnabled = NEO::Debugger::isDebugEnabled(args.isInternal) && args.device->getL0Debugger(); + NEO::EncodeStateBaseAddress::setSbaTrackingForL0DebuggerIfEnabled(sbaTrackingEnabled, + *args.device, + *container.getCommandStream(), + sbaCmd, container.isUsingPrimaryBuffer()); } }