fix: store kernel in cmdlist when printf buffer is created

Resolves: NEO-14993

Signed-off-by: Mateusz Hoppe <mateusz.hoppe@intel.com>
This commit is contained in:
Mateusz Hoppe
2025-05-20 12:35:33 +00:00
committed by Compute-Runtime-Automation
parent 0f8ee57f98
commit b01ab1176d
6 changed files with 133 additions and 16 deletions

View File

@@ -273,7 +273,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
commandContainer.addToResidencyContainer(resource);
}
if (kernelImmutableData->getDescriptor().kernelAttributes.flags.usesPrintf) {
if (kernelImp->getPrintfBufferAllocation() != nullptr) {
storePrintfKernel(kernel);
}

View File

@@ -556,7 +556,7 @@ ze_result_t CommandListCoreFamily<gfxCoreFamily>::appendLaunchKernelWithParams(K
// Store PrintfBuffer from a kernel
{
if (kernelDescriptor.kernelAttributes.flags.usesPrintf) {
if (kernelImp->getPrintfBufferAllocation() != nullptr) {
storePrintfKernel(kernel);
}
}

View File

@@ -195,6 +195,8 @@ const char *printfKernelSource = R"===(
#define MACRO_STR1 "string with tab(\\t) new line(\\n):"
#define MACRO_STR2 "using tab \tand new line \nin this string"
void printf_function();
__kernel void printf_kernel(char byteValue, short shortValue, int intValue, long longValue) {
printf("byte = %hhd\nshort = %hd\nint = %d\nlong = %ld", byteValue, shortValue, intValue, longValue);
}
@@ -215,6 +217,18 @@ __kernel void print_macros() {
printf("%s\n%s", MACRO_STR1, MACRO_STR2);
}
__kernel void print_from_function_kernel() {
printf_function();
}
)===";
const char *printfFunctionSource = R"===(
void printf_function() {
printf("test function\n");
}
)===";
const char *readNV12Module = R"===(

View File

@@ -1,5 +1,5 @@
/*
* Copyright (C) 2021-2024 Intel Corporation
* Copyright (C) 2021-2025 Intel Corporation
*
* SPDX-License-Identifier: MIT
*
@@ -28,6 +28,7 @@ extern const char *scratchKernelSrc;
extern const char *scratchKernelBuildOptions;
extern const char *printfKernelSource;
extern const char *printfFunctionSource;
extern const char *readNV12Module;

View File

@@ -27,19 +27,20 @@
#include <unistd.h>
#endif
static constexpr std::array<const char *, 4> kernelNames = {"printf_kernel",
static constexpr std::array<const char *, 5> kernelNames = {"printf_kernel",
"printf_kernel1",
"print_string",
"print_macros"};
"print_macros",
"print_from_function_kernel"};
enum class PrintfExecutionMode : uint32_t {
commandQueue,
immSyncCmdList
};
void createModule(const ze_context_handle_t context, const ze_device_handle_t device, ze_module_handle_t &module) {
void createModule(const ze_context_handle_t context, const ze_device_handle_t device, const char *source, ze_module_handle_t &module) {
std::string buildLog;
auto spirV = LevelZeroBlackBoxTests::compileToSpirV(LevelZeroBlackBoxTests::printfKernelSource, "", buildLog);
auto spirV = LevelZeroBlackBoxTests::compileToSpirV(source, "", buildLog);
LevelZeroBlackBoxTests::printBuildLog(buildLog);
SUCCESS_OR_TERMINATE((0 == spirV.size()));
@@ -47,7 +48,7 @@ void createModule(const ze_context_handle_t context, const ze_device_handle_t de
moduleDesc.format = ZE_MODULE_FORMAT_IL_SPIRV;
moduleDesc.pInputModule = spirV.data();
moduleDesc.inputSize = spirV.size();
moduleDesc.pBuildFlags = "";
moduleDesc.pBuildFlags = "-library-compilation";
SUCCESS_OR_TERMINATE(zeModuleCreate(context, device, &moduleDesc, &module, nullptr));
}
@@ -90,7 +91,7 @@ void runPrintfKernel(const ze_module_handle_t &module, const ze_kernel_handle_t
dispatchTraits.groupCountX = 10u;
dispatchTraits.groupCountY = 1u;
dispatchTraits.groupCountZ = 1u;
} else if (id == 2 || id == 3) {
} else if (id == 2 || id == 3 || id == 4) {
SUCCESS_OR_TERMINATE(zeKernelSetGroupSize(kernel, 1U, 1U, 1U));
dispatchTraits.groupCountX = 1u;
@@ -103,16 +104,17 @@ void runPrintfKernel(const ze_module_handle_t &module, const ze_kernel_handle_t
SUCCESS_OR_TERMINATE(commandHandler.synchronize());
}
void cleanUp(ze_context_handle_t context, ze_module_handle_t module, ze_kernel_handle_t *kernels, uint32_t kernelsCount) {
void cleanUp(ze_context_handle_t context, ze_module_handle_t module, ze_module_handle_t module2, ze_kernel_handle_t *kernels, uint32_t kernelsCount) {
for (uint32_t i = 0; i < kernelsCount; i++) {
SUCCESS_OR_TERMINATE(zeKernelDestroy(kernels[i]));
}
SUCCESS_OR_TERMINATE(zeModuleDestroy(module));
SUCCESS_OR_TERMINATE(zeModuleDestroy(module2));
SUCCESS_OR_TERMINATE(zeContextDestroy(context));
}
int main(int argc, char *argv[]) {
constexpr uint32_t kernelsCount = 4;
constexpr uint32_t kernelsCount = 5;
LevelZeroBlackBoxTests::verbose = LevelZeroBlackBoxTests::isVerbose(argc, argv);
auto pid = getpid();
std::stringstream filenameWithPid;
@@ -120,7 +122,7 @@ int main(int argc, char *argv[]) {
auto fileNameStr = filenameWithPid.str();
auto *fileName = fileNameStr.c_str();
bool validatePrintfOutput = true;
bool validatePrintfOutput = false;
bool printfValidated = false;
int stdoutFd = -1;
@@ -133,7 +135,25 @@ int main(int argc, char *argv[]) {
LevelZeroBlackBoxTests::printDeviceProperties(deviceProperties);
ze_module_handle_t module = nullptr;
createModule(context, device, module);
ze_module_handle_t module2 = nullptr;
createModule(context, device, LevelZeroBlackBoxTests::printfKernelSource, module);
createModule(context, device, LevelZeroBlackBoxTests::printfFunctionSource, module2);
{
ze_module_handle_t modulesToLink[] = {module, module2};
ze_module_build_log_handle_t dynLinkLog;
SUCCESS_OR_TERMINATE(zeModuleDynamicLink(2, modulesToLink, &dynLinkLog));
size_t buildLogSize = 0;
SUCCESS_OR_TERMINATE(zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, nullptr));
char *logBuffer = new char[buildLogSize]();
SUCCESS_OR_TERMINATE(zeModuleBuildLogGetString(dynLinkLog, &buildLogSize, logBuffer));
if (LevelZeroBlackBoxTests::verbose) {
std::cout << "Dynamically linked modules\n";
std::cout << logBuffer << "\n";
}
}
ze_kernel_handle_t kernels[kernelsCount] = {};
for (uint32_t i = 0; i < kernelsCount; i++) {
createKernel(module, kernels[i], kernelNames[i]);
@@ -144,7 +164,8 @@ int main(int argc, char *argv[]) {
"id == 0\nid == 0\nid == 0\nid == 0\nid == 0\n"
"id == 0\nid == 0\nid == 0\nid == 0\nid == 0\n",
"string with tab(\\t) new line(\\n):\nusing tab \tand new line \nin this string",
"string with tab(\\t) new line(\\n):\nusing tab \tand new line \nin this string"};
"string with tab(\\t) new line(\\n):\nusing tab \tand new line \nin this string",
"test_function\n"};
PrintfExecutionMode executionModes[] = {PrintfExecutionMode::commandQueue, PrintfExecutionMode::immSyncCmdList};
std::string executionModeNames[] = {"Asynchronous Command Queue", "Synchronous Immediate Command List"};
@@ -205,14 +226,14 @@ int main(int argc, char *argv[]) {
}
if (validatePrintfOutput && !printfValidated) {
cleanUp(context, module, kernels, kernelsCount);
cleanUp(context, module, module2, kernels, kernelsCount);
std::cerr << "\nZello Printf FAILED " << std::endl;
return -1;
}
}
}
cleanUp(context, module, kernels, kernelsCount);
cleanUp(context, module, module2, kernels, kernelsCount);
std::cout << "\nZello Printf PASSED " << std::endl;
return 0;

View File

@@ -292,6 +292,44 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
ASSERT_FALSE(event->getKernelForPrintf().expired());
}
HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelWithPrintfBufferCreatedForStackCallsWhenAppendingLaunchKernelIndirectThenKernelIsStoredOnEvent) {
Mock<Module> module(this->device, nullptr);
auto kernel = new Mock<::L0::KernelImp>{};
static_cast<ModuleImp *>(&module)->getPrintfKernelContainer().push_back(std::shared_ptr<Mock<::L0::KernelImp>>{kernel});
ze_result_t returnValue;
std::unique_ptr<L0::CommandList> commandList(L0::CommandList::create(productFamily, device, NEO::EngineGroupType::renderCompute, 0u, returnValue, false));
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
kernel->pImplicitArgs.reset(new ImplicitArgs());
kernel->pImplicitArgs->v0.header.structVersion = 0;
kernel->pImplicitArgs->v0.header.structSize = ImplicitArgsV0::getSize();
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernel->immutableData.kernelDescriptor);
kernel->createPrintfBuffer();
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
auto event = std::unique_ptr<L0::Event>(L0::Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
const auto &printfContainer = commandList->getPrintfKernelContainer();
EXPECT_EQ(0u, printfContainer.size());
ze_group_count_t groupCount{1, 1, 1};
auto result = commandList->appendLaunchKernelIndirect(kernel->toHandle(), groupCount, event->toHandle(), 0, nullptr, false);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, printfContainer.size());
ASSERT_FALSE(event->getKernelForPrintf().expired());
}
HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelIndirectThenKernelIsNotStoredOnEvent) {
Mock<Module> module(this->device, nullptr);
auto kernel = new Mock<::L0::KernelImp>{};
@@ -354,6 +392,49 @@ HWTEST_F(CommandListAppendLaunchKernel, givenNonemptyAllocPrintfBufferKernelWhen
ASSERT_FALSE(event->getKernelForPrintf().expired());
}
HWTEST_F(CommandListAppendLaunchKernel, givenNonPrintfKernelAndPrintfBufferForStackCallsWhenAppendingLaunchKernelWithParamThenKernelIsStoredOnEvent) {
Mock<Module> module(this->device, nullptr);
auto kernel = new Mock<::L0::KernelImp>{};
static_cast<ModuleImp *>(&module)->getPrintfKernelContainer().push_back(std::shared_ptr<Mock<::L0::KernelImp>>{kernel});
ze_result_t returnValue;
ze_event_pool_desc_t eventPoolDesc = {};
eventPoolDesc.flags = ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
eventPoolDesc.count = 1;
kernel->module = &module;
kernel->descriptor.kernelAttributes.flags.usesPrintf = false;
kernel->descriptor.kernelAttributes.flags.useStackCalls = true;
kernel->pImplicitArgs.reset(new ImplicitArgs());
kernel->pImplicitArgs->v0.header.structVersion = 0;
kernel->pImplicitArgs->v0.header.structSize = ImplicitArgsV0::getSize();
UnitTestHelper<FamilyType>::adjustKernelDescriptorForImplicitArgs(*kernel->immutableData.kernelDescriptor);
kernel->createPrintfBuffer();
ze_event_desc_t eventDesc = {};
eventDesc.index = 0;
auto eventPool = std::unique_ptr<L0::EventPool>(EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, returnValue));
CmdListKernelLaunchParams launchParams = {};
launchParams.isCooperative = false;
auto event = std::unique_ptr<L0::Event>(Event::create<typename FamilyType::TimestampPacketType>(eventPool.get(), &eventDesc, device));
ze_group_count_t groupCount{1, 1, 1};
auto pCommandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<FamilyType::gfxCoreFamily>>>();
pCommandList->initialize(device, NEO::EngineGroupType::compute, 0u);
const auto &printfContainer = pCommandList->getPrintfKernelContainer();
EXPECT_EQ(0u, printfContainer.size());
auto result = pCommandList->appendLaunchKernelWithParams(kernel, groupCount, event.get(), launchParams);
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
EXPECT_NE(0u, printfContainer.size());
ASSERT_FALSE(event->getKernelForPrintf().expired());
}
HWTEST_F(CommandListAppendLaunchKernel, givenEmptyAllocPrintfBufferKernelWhenAppendingLaunchKernelWithParamThenKernelIsNotStoredOnEvent) {
Mock<Module> module(this->device, nullptr);
auto kernel = new Mock<::L0::KernelImp>{};