Flush printf from kernel during event synchronize

During event synchronize in commandlist, now the printf buffer
should get flushed out when host synchronize is called.

Related-To: LOCI-3681

Signed-off-by: Zhang, Winston <winston.zhang@intel.com>
This commit is contained in:
Zhang, Winston 2023-02-21 23:17:05 +00:00 committed by Compute-Runtime-Automation
parent 494ef4129e
commit ddfd2b0ac2
7 changed files with 176 additions and 0 deletions

View File

@ -361,6 +361,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelIndirect(ze_
Event *event = nullptr;
if (hEvent) {
event = Event::fromHandle(hEvent);
if (Kernel::fromHandle(kernelHandle)->getPrintfBufferAllocation() != nullptr) {
event->setKernelForPrintf(Kernel::fromHandle(kernelHandle));
}
launchParams.isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
}

View File

@ -228,6 +228,10 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
additionalCommands.pop_front();
}
if (event != nullptr && kernel->getPrintfBufferAllocation() != nullptr) {
event->setKernelForPrintf(kernel);
}
return ZE_RESULT_SUCCESS;
}

View File

@ -191,6 +191,9 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
bool isHostSignalScopeEvent = launchParams.isHostSignalScopeEvent;
Event *compactEvent = nullptr;
if (event) {
if (kernel->getPrintfBufferAllocation() != nullptr) {
event->setKernelForPrintf(kernel);
}
isHostSignalScopeEvent = event->isSignalScope(ZE_EVENT_SCOPE_FLAG_HOST);
if (compactL3FlushEvent(getDcFlushRequired(event->isSignalScope()))) {
compactEvent = event;

View File

@ -38,6 +38,7 @@ struct Context;
struct DriverHandle;
struct DriverHandleImp;
struct Device;
struct Kernel;
#pragma pack(1)
struct IpcEventPoolData {
@ -178,6 +179,12 @@ struct Event : _ze_event_handle_t {
uint32_t getMaxKernelCount() const {
return maxKernelCount;
}
void setKernelForPrintf(Kernel *inputKernelPtr) {
kernelWithPrintf = inputKernelPtr;
}
Kernel *getKernelForPrintf() {
return kernelWithPrintf;
}
bool isSignalScope() const {
return !!signalScope;
@ -221,6 +228,7 @@ struct Event : _ze_event_handle_t {
void *hostAddress = nullptr;
Device *device = nullptr;
EventPool *eventPool = nullptr;
Kernel *kernelWithPrintf = nullptr;
uint32_t maxKernelCount = 0;
uint32_t kernelCount = 1u;

View File

@ -13,6 +13,7 @@
#include "level_zero/core/source/event/event_imp.h"
#include "level_zero/core/source/gfx_core_helpers/l0_gfx_core_helper.h"
#include "level_zero/core/source/kernel/kernel.h"
#include "level_zero/tools/source/metrics/metric.h"
namespace L0 {
@ -308,6 +309,10 @@ ze_result_t EventImp<TagSizeT>::hostSynchronize(uint64_t timeout) {
while (true) {
ret = queryStatus();
if (ret == ZE_RESULT_SUCCESS) {
if (this->getKernelForPrintf() != nullptr) {
static_cast<Kernel *>(this->getKernelForPrintf())->printPrintfOutput(true);
this->setKernelForPrintf(nullptr);
}
return ret;
}

View File

@ -26,6 +26,7 @@
#include "level_zero/core/test/unit_tests/fixtures/multi_tile_fixture.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_cmdqueue.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
namespace L0 {
@ -250,6 +251,126 @@ HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingL
ASSERT_EQ(ZE_RESULT_ERROR_INVALID_ARGUMENT, result);
}
HWTEST2_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhenAppendingLaunchKernelIndirectThenKernelIsStoredOnEvent, IsAtLeastSkl) {
Mock<Module> module(this->device, nullptr);
Mock<::L0::Kernel> kernel;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesPrintf = true;
kernel.createPrintfBuffer();
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, event->getKernelForPrintf());
}
HWTEST2_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelIndirectThenKernelIsNotStoredOnEvent, IsAtLeastSkl) {
Mock<Module> module(this->device, nullptr);
Mock<::L0::Kernel> kernel;
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::RenderCompute, 0u, returnValue));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesPrintf = false;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto result = commandList->appendLaunchKernelIndirect(kernel.toHandle(), &groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(nullptr, event->getKernelForPrintf());
}
HWTEST2_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhenAppendingLaunchKernelWithParamThenKernelIsStoredOnEvent, IsAtLeastSkl) {
Mock<Module> module(this->device, nullptr);
Mock<::L0::Kernel> kernel;
ze_result_t returnValue;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesPrintf = true;
kernel.createPrintfBuffer();
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = false;
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_NE(nullptr, event->getKernelForPrintf());
}
HWTEST2_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelWithParamThenKernelIsNotStoredOnEvent, IsAtLeastSkl) {
Mock<Module> module(this->device, nullptr);
Mock<::L0::Kernel> kernel;
ze_result_t returnValue;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel.module = &module;
kernel.descriptor.kernelAttributes.flags.usesPrintf = false;
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = false;
auto event = std::unique_ptr<Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
auto result = pCommandList->appendLaunchKernelWithParams(&kernel, &groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
ASSERT_EQ(nullptr, event->getKernelForPrintf());
}
HWTEST2_F(CommandListAppendLaunchKernel, givenImmediateCommandListWhenAppendingLaunchKernelIndirectThenKernelIsExecutedOnImmediateCmdQ, IsAtLeastSkl) {
createKernel();
const ze_command_queue_desc_t desc = {};

View File

@ -28,6 +28,8 @@
#include "level_zero/core/test/unit_tests/mocks/mock_cmdlist.h"
#include "level_zero/core/test/unit_tests/mocks/mock_device.h"
#include "level_zero/core/test/unit_tests/mocks/mock_event.h"
#include "level_zero/core/test/unit_tests/mocks/mock_kernel.h"
#include "level_zero/core/test/unit_tests/mocks/mock_module.h"
#include <algorithm>
#include <atomic>
@ -2941,6 +2943,36 @@ TEST_F(EventTests, GivenResetAllPacketsFalseWhenResetPacketsThenKernelCountAndPa
EXPECT_EQ(event->gpuEndTimestamp, 0u);
}
TEST_F(EventTests, givenCallToEventQueryStatusWithKernelPointerReturnsCounter) {
auto event = std::make_unique<MockEventCompletion>(eventPool.get(), 1u, device);
Mock<Module> mockModule(this->device, nullptr);
Mock<Kernel> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
event->setKernelForPrintf(&mockKernel);
EXPECT_NE(nullptr, event->getKernelForPrintf());
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
event->hostSynchronize(timeout);
EXPECT_EQ(1u, mockKernel.printPrintfOutputCalledTimes);
}
TEST_F(EventTests, givenCallToEventQueryStatusWithNullKernelPointerReturnsCounter) {
auto event = std::make_unique<MockEventCompletion>(eventPool.get(), 1u, device);
Mock<Module> mockModule(this->device, nullptr);
Mock<Kernel> mockKernel;
mockKernel.descriptor.kernelAttributes.flags.usesPrintf = true;
mockKernel.module = &mockModule;
event->setKernelForPrintf(nullptr);
EXPECT_EQ(nullptr, event->getKernelForPrintf());
constexpr uint64_t timeout = std::numeric_limits<std::uint64_t>::max();
event->hostSynchronize(timeout);
EXPECT_EQ(0u, mockKernel.printPrintfOutputCalledTimes);
}
TEST_F(EventSynchronizeTest, whenEventSetCsrThenCorrectCsrSet) {
auto defaultCsr = neoDevice->getDefaultEngine().commandStreamReceiver;
const auto mockCsr = std::make_unique<MockCommandStreamReceiver>(*neoDevice->getExecutionEnvironment(), 0, neoDevice->getDeviceBitfield());