diff --git a/level_zero/core/source/cmdlist/cmdlist_hw.inl b/level_zero/core/source/cmdlist/cmdlist_hw.inl index fe499cb7fa..6483b3b731 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw.inl @@ -361,6 +361,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelIndirect(ze_ Event *event = nullptr; if (hEvent) { event = Event::fromHandle(hEvent); + if (Kernel::fromHandle(kernelHandle)->getPrintfBufferAllocation() != nullptr) { + event->setKernelForPrintf(Kernel::fromHandle(kernelHandle)); + } launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl index 192e39e6cc..08568fba3d 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_base.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_base.inl @@ -228,6 +228,10 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K additionalCommands.pop_front(); } + if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) { + event->setKernelForPrintf(kernel); + } + return ZE_RESULT_SUCCESS; } diff --git a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl index e68ffa6684..ef2467b4f8 100644 --- a/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl +++ b/level_zero/core/source/cmdlist/cmdlist_hw_xehp_and_later.inl @@ -191,6 +191,9 @@ ze_result_t CommandListCoreFamily::appendLaunchKernelWithParams(K bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent; Event *compactEvent = nullptr; if (event) { + if (kernel->getPrintfBufferAllocation() != nullptr) { + event->setKernelForPrintf(kernel); + } isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST); if (compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) { compactEvent = event; diff --git a/level_zero/core/source/event/event.h b/level_zero/core/source/event/event.h index cb7a6797b3..8f216e4494 100644 --- a/level_zero/core/source/event/event.h +++ b/level_zero/core/source/event/event.h @@ -38,6 +38,7 @@ struct Context; struct DriverHandle; struct DriverHandleImp; struct Device; +struct Kernel; #pragma pack(1) struct IpcEventPoolData { @@ -178,6 +179,12 @@ struct Event : _ze_event_handle_t { uint32_t getMaxKernelCount() const { return maxKernelCount; } + void setKernelForPrintf(Kernel *inputKernelPtr) { + kernelWithPrintf = inputKernelPtr; + } + Kernel *getKernelForPrintf() { + return kernelWithPrintf; + } bool isSignalScope() const { return !!signalScope; @@ -221,6 +228,7 @@ struct Event : _ze_event_handle_t { void *hostAddress = nullptr; Device *device = nullptr; EventPool *eventPool = nullptr; + Kernel *kernelWithPrintf = nullptr; uint32_t maxKernelCount = 0; uint32_t kernelCount = 1u; diff --git a/level_zero/core/source/event/event_impl.inl b/level_zero/core/source/event/event_impl.inl index 97c583bfd0..a5df0a1faa 100644 --- a/level_zero/core/source/event/event_impl.inl +++ b/level_zero/core/source/event/event_impl.inl @@ -13,6 +13,7 @@ #include "level_zero/core/source/event/event_imp.h" #include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h" +#include "level_zero/core/source/kernel/kernel.h" #include "level_zero/tools/source/metrics/metric.h" namespace L0 { @@ -308,6 +309,10 @@ ze_result_t EventImp::hostSynchronize(uint64_t timeout) { while (true) { ret = queryStatus(); if (ret == ZE_RESULT_SUCCESS) { + if (this->getKernelForPrintf() != nullptr) { + static_cast(this->getKernelForPrintf())->printPrintfOutput(true); + this->setKernelForPrintf(nullptr); + } return ret; } diff --git a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp index 5b98e66380..6614537194 100644 --- a/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp +++ b/level_zero/core/test/unit_tests/sources/cmdlist/test_cmdlist_append_launch_kernel_3.cpp @@ -26,6 +26,7 @@ #include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" #include "level_zero/core/test/unit_tests/mocks/mock_module.h" namespace L0 { @@ -250,6 +251,126 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result); } +HWTEST2_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhenAppendingLaunchKernelIndirectThenKernelIsStoredOnEvent, IsAtLeastSkl) { + Mock module(this->device, nullptr); + Mock<::L0::Kernel> kernel; + + ze_result_t returnValue; + std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + kernel.module = &module; + kernel.descriptor.kernelAttributes.flags.usesPrintf = true; + kernel.createPrintfBuffer(); + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ASSERT_NE(nullptr, event->getKernelForPrintf()); +} + +HWTEST2_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelIndirectThenKernelIsNotStoredOnEvent, IsAtLeastSkl) { + Mock module(this->device, nullptr); + Mock<::L0::Kernel> kernel; + + ze_result_t returnValue; + std::unique_ptr commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue)); + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + kernel.module = &module; + kernel.descriptor.kernelAttributes.flags.usesPrintf = false; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, false); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ASSERT_EQ(nullptr, event->getKernelForPrintf()); +} + +HWTEST2_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhenAppendingLaunchKernelWithParamThenKernelIsStoredOnEvent, IsAtLeastSkl) { + Mock module(this->device, nullptr); + Mock<::L0::Kernel> kernel; + + ze_result_t returnValue; + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + kernel.module = &module; + kernel.descriptor.kernelAttributes.flags.usesPrintf = true; + kernel.createPrintfBuffer(); + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + CmdListKernelLaunchParams launchParams = {}; + launchParams.isCooperative = false; + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + + auto pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + + auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, event.get(), launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ASSERT_NE(nullptr, event->getKernelForPrintf()); +} + +HWTEST2_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelWithParamThenKernelIsNotStoredOnEvent, IsAtLeastSkl) { + Mock module(this->device, nullptr); + Mock<::L0::Kernel> kernel; + + ze_result_t returnValue; + ze_event_pool_desc_t eventPoolDesc = {}; + eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE; + eventPoolDesc.count = 1; + + kernel.module = &module; + kernel.descriptor.kernelAttributes.flags.usesPrintf = false; + + ze_event_desc_t eventDesc = {}; + eventDesc.index = 0; + + auto eventPool = std::unique_ptr(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue)); + + CmdListKernelLaunchParams launchParams = {}; + launchParams.isCooperative = false; + auto event = std::unique_ptr(Event::create(eventPool.get(), &eventDesc, device)); + + ze_group_count_t groupCount{1, 1, 1}; + + auto pCommandList = std::make_unique>>(); + pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u); + + auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, event.get(), launchParams); + EXPECT_EQ(ZE_RESULT_SUCCESS, result); + + ASSERT_EQ(nullptr, event->getKernelForPrintf()); +} + HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectThenKernelIsExecutedOnImmediateCmdQ, IsAtLeastSkl) { createKernel(); const ze_command_queue_desc_t desc = {}; diff --git a/level_zero/core/test/unit_tests/sources/event/test_event.cpp b/level_zero/core/test/unit_tests/sources/event/test_event.cpp index a22667f31d..e85b9a8835 100644 --- a/level_zero/core/test/unit_tests/sources/event/test_event.cpp +++ b/level_zero/core/test/unit_tests/sources/event/test_event.cpp @@ -28,6 +28,8 @@ #include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h" #include "level_zero/core/test/unit_tests/mocks/mock_device.h" #include "level_zero/core/test/unit_tests/mocks/mock_event.h" +#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h" +#include "level_zero/core/test/unit_tests/mocks/mock_module.h" #include #include @@ -2941,6 +2943,36 @@ TEST_F(EventTests, GivenResetAllPacketsFalseWhenResetPacketsThenKernelCountAndPa EXPECT_EQ(event->gpuEndTimestamp, 0u); } +TEST_F(EventTests, givenCallToEventQueryStatusWithKernelPointerReturnsCounter) { + auto event = std::make_unique(eventPool.get(), 1u, device); + Mock mockModule(this->device, nullptr); + Mock mockKernel; + mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true; + mockKernel.module = &mockModule; + + event->setKernelForPrintf(&mockKernel); + EXPECT_NE(nullptr, event->getKernelForPrintf()); + + constexpr uint64_t timeout = std::numeric_limits::max(); + event->hostSynchronize(timeout); + EXPECT_EQ(1u, mockKernel.printPrintfOutputCalledTimes); +} + +TEST_F(EventTests, givenCallToEventQueryStatusWithNullKernelPointerReturnsCounter) { + auto event = std::make_unique(eventPool.get(), 1u, device); + Mock mockModule(this->device, nullptr); + Mock mockKernel; + mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true; + mockKernel.module = &mockModule; + + event->setKernelForPrintf(nullptr); + EXPECT_EQ(nullptr, event->getKernelForPrintf()); + + constexpr uint64_t timeout = std::numeric_limits::max(); + event->hostSynchronize(timeout); + EXPECT_EQ(0u, mockKernel.printPrintfOutputCalledTimes); +} + TEST_F(EventSynchronizeTest, whenEventSetCsrThenCorrectCsrSet) { auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver; const auto mockCsr = std::make_unique(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());